Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions examples/scripts/alignprop.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "Pillow>=9.4.0",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fyi this is the minimum version to support EXIF metadata

# ]
# ///

"""
Total Batch size = 128 = 4 (num_gpus) * 8 (per_device_batch) * 4 (accumulation steps)
Feel free to reduce batch size or increasing truncated_rand_backprop_min to a higher value to reduce memory usage.
Expand Down
7 changes: 7 additions & 0 deletions examples/scripts/bco.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "peft",
# ]
# ///

"""
Run the BCO training script with the commands below. In general, the optimal configuration for BCO will be similar to that of KTO.

Expand Down
7 changes: 7 additions & 0 deletions examples/scripts/cpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "peft",
# ]
# ///

"""
Run the CPO training script with the following command with some example arguments.
In general, the optimal configuration for CPO will be similar to that of DPO:
Expand Down
6 changes: 6 additions & 0 deletions examples/scripts/ddpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

"""
python examples/scripts/ddpo.py \
--num_epochs=200 \
Expand Down
7 changes: 7 additions & 0 deletions examples/scripts/dpo_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "peft",
# ]
# ///

"""
Usage:

Expand Down
8 changes: 8 additions & 0 deletions examples/scripts/dpo_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "peft",
# "Pillow>=9.4.0",
# ]
# ///

"""
Without dataset streaming:

Expand Down
7 changes: 7 additions & 0 deletions examples/scripts/evals/judge_tldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "vllm",
# ]
# ///

from dataclasses import dataclass, field
from typing import Optional

Expand Down
7 changes: 7 additions & 0 deletions examples/scripts/gkd.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "peft",
# ]
# ///

"""
# Full training:
python examples/scripts/gkd.py \
Expand Down
9 changes: 9 additions & 0 deletions examples/scripts/grpo_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "peft",
# "math-verify",
# "latex2sympy2_extended",
# ]
# ///

"""
pip install math_verify

Expand Down
7 changes: 7 additions & 0 deletions examples/scripts/kto.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "peft",
# ]
# ///

"""
Run the KTO training script with the commands below. In general, the optimal configuration for KTO will be similar to that of DPO.

Expand Down
6 changes: 6 additions & 0 deletions examples/scripts/nash_md.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

"""
Usage:

Expand Down
7 changes: 7 additions & 0 deletions examples/scripts/orpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "peft",
# ]
# ///

"""
Run the ORPO training script with the following command with some example arguments.
In general, the optimal configuration for ORPO will be similar to that of DPO without the need for a reference model:
Expand Down
7 changes: 7 additions & 0 deletions examples/scripts/ppo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "peft",
# ]
# ///

import shutil

import torch
Expand Down
7 changes: 7 additions & 0 deletions examples/scripts/ppo/ppo_tldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "peft",
# ]
# ///

import shutil

import torch
Expand Down
6 changes: 6 additions & 0 deletions examples/scripts/prm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

"""
Full training:
python examples/scripts/prm.py \
Expand Down
6 changes: 6 additions & 0 deletions examples/scripts/reward_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

"""
Full training:
python examples/scripts/reward_modeling.py \
Expand Down
6 changes: 6 additions & 0 deletions examples/scripts/rloo/rloo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

import shutil

from accelerate import PartialState
Expand Down
6 changes: 6 additions & 0 deletions examples/scripts/rloo/rloo_tldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

import shutil

from accelerate import PartialState
Expand Down
6 changes: 6 additions & 0 deletions examples/scripts/sft_gemma3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

"""
Train Gemma-3 on the Codeforces COTS dataset.

Expand Down
9 changes: 9 additions & 0 deletions examples/scripts/sft_video_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "peft",
# "wandb",
# "qwen-vl-utils",
# ]
# ///

"""
Example usage:
accelerate launch \
Expand Down
7 changes: 7 additions & 0 deletions examples/scripts/sft_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "Pillow>=9.4.0",
# ]
# ///

"""
pip install pillow

Expand Down
7 changes: 7 additions & 0 deletions examples/scripts/sft_vlm_gemma3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "Pillow>=9.4.0",
# ]
# ///

"""
Train Gemma-3 on the HuggingFaceH4/llava-instruct-mix-vsft dataset (single-image).

Expand Down
7 changes: 7 additions & 0 deletions examples/scripts/sft_vlm_smol_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# "Pillow>=9.4.0",
# ]
# ///

"""
pip install pillow

Expand Down
6 changes: 6 additions & 0 deletions examples/scripts/xpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

"""
Usage:

Expand Down
6 changes: 6 additions & 0 deletions trl/scripts/dpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

"""
# Full training
```bash
Expand Down
6 changes: 6 additions & 0 deletions trl/scripts/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

import os
import platform
from importlib.metadata import version
Expand Down
6 changes: 6 additions & 0 deletions trl/scripts/grpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

import argparse
import importlib
import os
Expand Down
6 changes: 6 additions & 0 deletions trl/scripts/kto.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

"""
Run the KTO training script with the commands below. In general, the optimal configuration for KTO will be similar to
that of DPO.
Expand Down
6 changes: 6 additions & 0 deletions trl/scripts/sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# /// script
# dependencies = [
# "trl @ git+https://github.com/huggingface/trl.git",
# ]
# ///

"""
# Full training
```
Expand Down
Loading