ggml-org
diff --git a/‎common/arg.cpp
Lines changed: 35 additions & 44 deletions b/‎common/arg.cpp
Lines changed: 35 additions & 44 deletions
diff --git a/‎common/common.h
Lines changed: 13 additions & 15 deletions b/‎common/common.h
Lines changed: 13 additions & 15 deletions
diff --git a/‎examples/diffusion/CMakeLists.txt
Lines changed: 2 additions & 8 deletions b/‎examples/diffusion/CMakeLists.txt
Lines changed: 2 additions & 8 deletions
diff --git a/‎examples/diffusion/README.md
Lines changed: 10 additions & 8 deletions b/‎examples/diffusion/README.md
Lines changed: 10 additions & 8 deletions
@@ -3438,59 +3438,50 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER}));
 
-    // shared diffusion parameters
     add_opt(common_arg(
         { "--diffusion-steps" }, "N",
-        string_format("number of diffusion steps (default: %d)", params.diffusion_dream.steps),
-        [](common_params & params, int value) {
-            params.diffusion_dream.steps = value;
-            params.diffusion_llada.steps = value;
-        }
-    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION_DREAM, LLAMA_EXAMPLE_DIFFUSION_LLADA }));
+        string_format("number of diffusion steps (default: %d)", params.diffusion.steps),
+        [](common_params & params, int value) { params.diffusion.steps = value; }
+    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
     add_opt(common_arg(
         { "--diffusion-visual" },
         string_format("enable visual diffusion mode (show progressive generation) (default: %s)",
-                      params.diffusion_dream.visual_mode ? "true" : "false"),
-        [](common_params & params) {
-            params.diffusion_dream.visual_mode = true;
-            params.diffusion_llada.visual_mode = true;
-        }
-    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION_DREAM, LLAMA_EXAMPLE_DIFFUSION_LLADA }));
+                      params.diffusion.visual_mode ? "true" : "false"),
+        [](common_params & params) { params.diffusion.visual_mode = true; }
+    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
 
-    // DREAM-specific diffusion parameters
     add_opt(common_arg(
-        { "--diffusion-eps" }, "F",
-        string_format("epsilon for timesteps (default: %.6f)", (double) params.diffusion_dream.eps),
-        [](common_params & params, const std::string & value) { params.diffusion_dream.eps = std::stof(value); }
-    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION_DREAM }));
+        { "--diffusion--dream-eps" }, "F",
+        string_format("epsilon for timesteps (default: %.6f)", (double) params.diffusion.eps),
+        [](common_params & params, const std::string & value) { params.diffusion.eps = std::stof(value); }
+    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
     add_opt(common_arg(
-        { "--diffusion-algorithm" }, "N",
+        { "--diffusion-dream-algorithm" }, "N",
         string_format("diffusion algorithm: 0=ORIGIN, 1=MASKGIT_PLUS, 2=TOPK_MARGIN, 3=ENTROPY (default: %d)",
-                      params.diffusion_dream.algorithm),
-        [](common_params & params, int value) { params.diffusion_dream.algorithm = value; }
-    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION_DREAM }));
-    add_opt(common_arg(
-        { "--diffusion-alg-temp" }, "F",
-        string_format("algorithm temperature (default: %.3f)", (double) params.diffusion_dream.alg_temp),
-        [](common_params & params, const std::string & value) { params.diffusion_dream.alg_temp = std::stof(value); }
-    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION_DREAM }));
-
-    // LLADA-specific diffusion parameters
-    add_opt(common_arg(
-        { "--diffusion-block-length" }, "N",
-        string_format("block length for generation (default: %d)", params.diffusion_llada.block_length),
-        [](common_params & params, int value) { params.diffusion_llada.block_length = value; }
-    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION_LLADA }));
-    add_opt(common_arg(
-        { "--diffusion-cfg-scale" }, "F",
-        string_format("classifier-free guidance scale (default: %.3f)", (double) params.diffusion_llada.cfg_scale),
-        [](common_params & params, const std::string & value) { params.diffusion_llada.cfg_scale = std::stof(value); }
-    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION_LLADA }));
-    add_opt(common_arg(
-        { "--diffusion-algorithm" }, "N",
-        string_format("remasking algorithm: 0=LOW_CONFIDENCE, 1=RANDOM (default: %d)", params.diffusion_llada.remasking),
-        [](common_params & params, int value) { params.diffusion_llada.remasking = value; }
-    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION_LLADA }));
+                      params.diffusion.algorithm),
+        [](common_params & params, int value) { params.diffusion.algorithm = value; }
+    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+    add_opt(common_arg(
+        { "--diffusion-dream-alg-temp" }, "F",
+        string_format("dream algorithm temperature (default: %.3f)", (double) params.diffusion.alg_temp),
+        [](common_params & params, const std::string & value) { params.diffusion.alg_temp = std::stof(value); }
+    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+
+    add_opt(common_arg(
+        { "--diffusion-llada-block-length" }, "N",
+        string_format("llada block length for generation (default: %d)", params.diffusion.block_length),
+        [](common_params & params, int value) { params.diffusion.block_length = value; }
+    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+    add_opt(common_arg(
+        { "--diffusion-llada-cfg-scale" }, "F",
+        string_format("llada classifier-free guidance scale (default: %.3f)", (double) params.diffusion.cfg_scale),
+        [](common_params & params, const std::string & value) { params.diffusion.cfg_scale = std::stof(value); }
+    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+    add_opt(common_arg(
+        { "--diffusion-llada-algorithm" }, "N",
+        string_format("llada remasking algorithm: 0=LOW_CONFIDENCE, 1=RANDOM (default: %d)", params.diffusion.remasking),
+        [](common_params & params, int value) { params.diffusion.remasking = value; }
+    ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
 
     return ctx_arg;
 }
@@ -81,8 +81,7 @@ enum llama_example {
     LLAMA_EXAMPLE_LOOKUP,
     LLAMA_EXAMPLE_PARALLEL,
     LLAMA_EXAMPLE_TTS,
-    LLAMA_EXAMPLE_DIFFUSION_DREAM,
-    LLAMA_EXAMPLE_DIFFUSION_LLADA,
+    LLAMA_EXAMPLE_DIFFUSION,
 
     LLAMA_EXAMPLE_COUNT,
 };
@@ -220,20 +219,20 @@ struct common_params_vocoder {
     bool use_guide_tokens = false; // enable guide tokens to improve TTS accuracy            // NOLINT
 };
 
-struct common_params_diffusion_dream {
-    int32_t steps       = 64;     // number of diffusion steps
-    float   eps         = 1e-3f;  // epsilon for timesteps
-    int32_t algorithm   = 0;      // diffusion algorithm (0=ORIGIN, 1=MASKGIT_PLUS, 2=TOPK_MARGIN, 3=ENTROPY)
-    float   alg_temp    = 0.0f;   // algorithm temperature
-    bool    visual_mode = false;  // show progressive diffusion on screen
-};
+struct common_params_diffusion {
+    // Common parameters
+    int32_t steps         = 128;     // number of diffusion steps
+    bool    visual_mode   = false;  // show progressive diffusion on screen
 
-struct common_params_diffusion_llada {
-    int32_t steps         = 64;     // number of diffusion steps
+    // Dream-specific parameters
+    float   eps           = 1e-3f;  // epsilon for timesteps
+    int32_t algorithm     = 3;      // diffusion algorithm (0=ORIGIN, 1=MASKGIT_PLUS, 2=TOPK_MARGIN, 3=ENTROPY)
+    float   alg_temp      = 0.0f;   // algorithm temperature
+
+    // LLaDA-specific parameters
     int32_t block_length  = 32;     // block length for generation
     float   cfg_scale     = 0.2f;   // classifier-free guidance scale
-    int32_t remasking     = 0;      // remasking algorithm: 0=LOW_CONFIDENCE, 1=RANDOM
-    bool    visual_mode   = false;  // show progressive diffusion on screen
+    int32_t remasking     = 1;      // remasking algorithm: 0=LOW_CONFIDENCE, 1=RANDOM
 };
 
 enum common_reasoning_format {
@@ -287,8 +286,7 @@ struct common_params {
     struct common_params_sampling    sampling;
     struct common_params_speculative speculative;
     struct common_params_vocoder            vocoder;
-    struct common_params_diffusion_dream   diffusion_dream;
-    struct common_params_diffusion_llada   diffusion_llada;
+    struct common_params_diffusion         diffusion;
 
     struct common_params_model model;
 
 
@@ -1,11 +1,5 @@
-set(TARGET llama-diffusion-dream-cli)
-add_executable(${TARGET} diffusion-dream-cli.cpp)
-install(TARGETS ${TARGET} RUNTIME)
-target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
-target_compile_features(${TARGET} PRIVATE cxx_std_17)
-
-set(TARGET llama-diffusion-llada-cli)
-add_executable(${TARGET} diffusion-llada-cli.cpp)
+set(TARGET llama-diffusion-cli)
+add_executable(${TARGET} diffusion-cli.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
@@ -2,35 +2,37 @@
 
 This directory contains implementations for diffusion-based text generation using two different model architectures: **Dream** and **LLaDA-8B**. Both models use iterative denoising processes to generate text, but employ different sampling strategies and algorithms.
 
-## Supported Models
+## Supported Architechtures
 
-### 1. Dream Model (`llama-diffusion-dream-cli`)
+### 1. Dream
 
+Example models:
 - https://huggingface.co/Dream-org/Dream-v0-Base-7B
-- Original PR - https://github.com/ggml-org/llama.cpp/pull/14644
+- PR - https://github.com/ggml-org/llama.cpp/pull/14644
 
-The Dream model supports four different sampling algorithms controlled by the `--diffusion-algorithm` parameter:
+The Dream model supports four different sampling algorithms controlled by the `--diffusion-dream-algorithm` parameter:
 
 1. **ORIGIN (0)** - Original diffusion algorithm
    - Uses probability transfer based on timestep ratios
-   - Default algorithm with standard confidence-based token selection
 
 2. **MASKGIT_PLUS (1)** - Enhanced MaskGIT sampling
    - Improved version of the MaskGIT algorithm
 
 3. **TOPK_MARGIN (2)** - Top-K margin-based sampling
    - Confidence calculated as the margin between top-1 and top-2 probabilities
 
-4. **ENTROPY (3)** - Entropy-based sampling (recommended)
+4. **ENTROPY (3)** - Entropy-based sampling (default, recommended)
    - Uses entropy calculation for confidence estimation
 
-### 2. LLaDA-8B Model (`llama-diffusion-llada-cli`)
+### 2. LLaDA
 
+Example models:
 - https://huggingface.co/GSAI-ML/LLaDA-8B-Instruct
+- PR: https://github.com/ggml-org/llama.cpp/pull/14771
 
 ### LLaDA Model Remasking Strategies
 
-The LLaDA model uses two remasking approaches controlled by the `--diffusion-algorithm` parameter:
+The LLaDA model uses two remasking approaches controlled by the `--diffusion-llada-algorithm` parameter:
 
 1. **REMASKING_LOW_CONFIDENCE (0)** - Default strategy
    - Remasks tokens with lowest confidence scores