From 3d3493ee3d8b479dd33804686d087e03b8343fa0 Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Wed, 16 Apr 2025 18:41:18 +0100
Subject: [PATCH 01/14] Expose ggml_backend_load_best

---
 ggml/include/ggml-backend.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index 64671495b38..0d9ebe88ce1 100644
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@@ -348,7 +348,12 @@ extern "C" {
     // CPU buffer types are always available
     GGML_API ggml_backend_buffer_t      ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
     GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
-
+    
+    // Load Best exposed to allow loading of specific types of backend
+    // Notably this allows you to load only one specific backend ignoring all
+    // others (e.g. only load cuda - without cpu)
+    GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path)
+    
 #ifdef  __cplusplus
 }
 #endif

From 81965b51e4863fd7c2461512509a9c22d55cac33 Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Wed, 16 Apr 2025 19:06:11 +0100
Subject: [PATCH 02/14] Fix header ggml\include\ggml_backend.h

---
 ggml/include/ggml-backend.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index 0d9ebe88ce1..859c5570315 100644
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@@ -352,7 +352,7 @@ extern "C" {
     // Load Best exposed to allow loading of specific types of backend
     // Notably this allows you to load only one specific backend ignoring all
     // others (e.g. only load cuda - without cpu)
-    GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path)
+    GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path);
     
 #ifdef  __cplusplus
 }

From bee63b81425df46715a78698f03f2cbc9b1009f2 Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Wed, 16 Apr 2025 19:18:10 +0100
Subject: [PATCH 03/14] if GGML_BACKEND_DL defined don't use
 whisper_load_backends in whisper.cpp

---
 src/whisper.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/whisper.cpp b/src/whisper.cpp
index 2c83f7bab3b..c4c076a8b99 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -1313,8 +1313,10 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) {
 static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) {
     ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);
 
+    #ifndef GGML_BACKEND_DL
     whisper_load_backends();
-
+    #eneif
+    
     ggml_backend_dev_t dev = nullptr;
 
     int cnt = 0;
@@ -4321,7 +4323,9 @@ static int whisper_has_openvino(void) {
 const char * whisper_print_system_info(void) {
     static std::string s;
 
+    #ifndef GGML_BACKEND_DL
     whisper_load_backends();
+    #eneif
 
     s  = "";
     s += "WHISPER : ";
@@ -6776,7 +6780,9 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
 }
 
 WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
+    #ifndef GGML_BACKEND_DL
     whisper_load_backends();
+    #eneif
 
     static std::string s;
     s = "";

From cc265ab4786b3896a29156241b982e801c2e8d94 Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Wed, 16 Apr 2025 19:29:32 +0100
Subject: [PATCH 04/14] Update examples bench, cli and stream to cater for
 alternate backend load

---
 examples/bench/bench.cpp   | 7 +++++++
 examples/cli/cli.cpp       | 7 +++++++
 examples/stream/stream.cpp | 7 +++++++
 3 files changed, 21 insertions(+)

diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp
index 54f73110d42..1cd75067415 100644
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@@ -61,6 +61,13 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
 static int whisper_bench_full(const whisper_params & params) {
     // whisper init
 
+    // If we're using a GGML_BACKEND_DL build we need to load backends before
+    // the model is initialised in whisper_init_from_file_with_params
+    // Failure to do this will result in attempts to query null devices
+    #ifdef GGML_BACKEND_DL
+    ggml_backend_load_all();
+    #eneif
+
     struct whisper_context_params cparams = whisper_context_default_params();
 
     cparams.use_gpu    = params.use_gpu;
diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp
index 4b2b3521b80..a8dfcc1d12c 100644
--- a/examples/cli/cli.cpp
+++ b/examples/cli/cli.cpp
@@ -1002,6 +1002,13 @@ int main(int argc, char ** argv) {
         whisper_log_set(cb_log_disable, NULL);
     }
 
+    // If we're using a GGML_BACKEND_DL build we need to load backends before
+    // the model is initialised in whisper_init_from_file_with_params
+    // Failure to do this will result in attempts to query null devices
+    #ifdef GGML_BACKEND_DL
+    ggml_backend_load_all();
+    #eneif
+
     // whisper init
 
     struct whisper_context_params cparams = whisper_context_default_params();
diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp
index 65c6587db92..17048647cbb 100644
--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@@ -155,6 +155,13 @@ int main(int argc, char ** argv) {
         exit(0);
     }
 
+    // If we're using a GGML_BACKEND_DL build we need to load backends before
+    // the model is initialised in whisper_init_from_file_with_params
+    // Failure to do this will result in attempts to query null devices
+    #ifdef GGML_BACKEND_DL
+    ggml_backend_load_all();
+    #eneif
+
     struct whisper_context_params cparams = whisper_context_default_params();
 
     cparams.use_gpu    = params.use_gpu;

From b96679c5e2a023f0694705439856cf4d3f6d6896 Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Wed, 16 Apr 2025 19:41:56 +0100
Subject: [PATCH 05/14] Fix dumb type eneif -> endif

---
 examples/bench/bench.cpp   |  2 +-
 examples/cli/cli.cpp       |  2 +-
 examples/stream/stream.cpp |  2 +-
 src/whisper.cpp            | 10 +++++-----
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp
index 1cd75067415..33a864ee84a 100644
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@@ -66,7 +66,7 @@ static int whisper_bench_full(const whisper_params & params) {
     // Failure to do this will result in attempts to query null devices
     #ifdef GGML_BACKEND_DL
     ggml_backend_load_all();
-    #eneif
+    #endif
 
     struct whisper_context_params cparams = whisper_context_default_params();
 
diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp
index a8dfcc1d12c..1e000643ddd 100644
--- a/examples/cli/cli.cpp
+++ b/examples/cli/cli.cpp
@@ -1007,7 +1007,7 @@ int main(int argc, char ** argv) {
     // Failure to do this will result in attempts to query null devices
     #ifdef GGML_BACKEND_DL
     ggml_backend_load_all();
-    #eneif
+    #endif
 
     // whisper init
 
diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp
index 17048647cbb..ce192b472d9 100644
--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@@ -160,7 +160,7 @@ int main(int argc, char ** argv) {
     // Failure to do this will result in attempts to query null devices
     #ifdef GGML_BACKEND_DL
     ggml_backend_load_all();
-    #eneif
+    #endif
 
     struct whisper_context_params cparams = whisper_context_default_params();
 
diff --git a/src/whisper.cpp b/src/whisper.cpp
index c4c076a8b99..899cafecc4d 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -209,12 +209,12 @@ static bool ggml_graph_compute_helper(
 }
 
 static void whisper_load_backends() {
-#ifdef GGML_BACKEND_DL
+    #ifndef GGML_BACKEND_DL
     static std::once_flag flag;
     std::call_once(flag, []() {
         ggml_backend_load_all();
     });
-#endif
+    #endif
 }
 
 // TODO: move these functions to ggml-base with support for ggml-backend?
@@ -1315,7 +1315,7 @@ static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & pa
 
     #ifndef GGML_BACKEND_DL
     whisper_load_backends();
-    #eneif
+    #endif
     
     ggml_backend_dev_t dev = nullptr;
 
@@ -4325,7 +4325,7 @@ const char * whisper_print_system_info(void) {
 
     #ifndef GGML_BACKEND_DL
     whisper_load_backends();
-    #eneif
+    #endif
 
     s  = "";
     s += "WHISPER : ";
@@ -6782,7 +6782,7 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
 WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
     #ifndef GGML_BACKEND_DL
     whisper_load_backends();
-    #eneif
+    #endif
 
     static std::string s;
     s = "";

From 3a46af03c388ae173b8cce590e105ef9bc5c0854 Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Wed, 16 Apr 2025 20:38:48 +0100
Subject: [PATCH 06/14] Add print_error_no_device and trigger if null backend
 passed. There may be more cases, two identified and trapped

---
 ggml/src/ggml-backend.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index 273075f4e54..85cadc0baf9 100644
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -346,7 +346,18 @@ bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor *
     return ggml_backend_dev_offload_op(backend->device, op);
 }
 
+void print_error_no_device(void) {
+    fprintf(stderr, "You are attampting to use a null backend.\n");
+    fprintf(stderr, "Please verify the backend is loaded before you try to use one\n");
+    fprintf(stderr, "See bench.cpp / cli.cpp / stream.cpp for example\n");
+}
+
 ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) {
+    #ifdef GGML_BACKEND_DL
+    if (backend == nullptr) {
+        print_error_no_device();
+    }
+    #endif
     return backend->device;
 }
 
@@ -469,6 +480,11 @@ void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_d
 }
 
 ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device) {
+    #ifdef GGML_BACKEND_DL
+    if (device == nullptr) {
+        print_error_no_device();
+    }
+    #endif
     return device->reg;
 }
 

From 5fb957a0e53d585cd3fbf0d93ca609a786b11551 Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Thu, 17 Apr 2025 04:29:40 +0100
Subject: [PATCH 07/14] Remove all references to whisper_load_backends

---
 ggml/src/ggml-backend.cpp |  4 ++--
 src/whisper.cpp           | 21 ---------------------
 2 files changed, 2 insertions(+), 23 deletions(-)

diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index 85cadc0baf9..8bce5f122af 100644
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -353,7 +353,7 @@ void print_error_no_device(void) {
 }
 
 ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) {
-    #ifdef GGML_BACKEND_DL
+    #ifdef WHISPER_BACKEND_DL
     if (backend == nullptr) {
         print_error_no_device();
     }
@@ -480,7 +480,7 @@ void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_d
 }
 
 ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device) {
-    #ifdef GGML_BACKEND_DL
+    #ifdef WHISPER_BACKEND_DL
     if (device == nullptr) {
         print_error_no_device();
     }
diff --git a/src/whisper.cpp b/src/whisper.cpp
index 899cafecc4d..fc5de5cdbf8 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -208,15 +208,6 @@ static bool ggml_graph_compute_helper(
     return t;
 }
 
-static void whisper_load_backends() {
-    #ifndef GGML_BACKEND_DL
-    static std::once_flag flag;
-    std::call_once(flag, []() {
-        ggml_backend_load_all();
-    });
-    #endif
-}
-
 // TODO: move these functions to ggml-base with support for ggml-backend?
 
 static ggml_tensor * whisper_set_f32(struct ggml_tensor * t, float v) {
@@ -1313,10 +1304,6 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) {
 static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) {
     ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);
 
-    #ifndef GGML_BACKEND_DL
-    whisper_load_backends();
-    #endif
-    
     ggml_backend_dev_t dev = nullptr;
 
     int cnt = 0;
@@ -4323,10 +4310,6 @@ static int whisper_has_openvino(void) {
 const char * whisper_print_system_info(void) {
     static std::string s;
 
-    #ifndef GGML_BACKEND_DL
-    whisper_load_backends();
-    #endif
-
     s  = "";
     s += "WHISPER : ";
     s += "COREML = "    + std::to_string(whisper_has_coreml())     + " | ";
@@ -6780,10 +6763,6 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
 }
 
 WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
-    #ifndef GGML_BACKEND_DL
-    whisper_load_backends();
-    #endif
-
     static std::string s;
     s = "";
     char strbuf[256];

From 0146f9813df3f7af002d3e12ea9052477644a8e7 Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Thu, 17 Apr 2025 06:25:03 +0100
Subject: [PATCH 08/14] Enable ggml_backend_load_best and allow only one device
 to be requested

---
 ggml/src/ggml-backend.cpp |  2 ++
 src/whisper.cpp           | 10 ++++++++++
 2 files changed, 12 insertions(+)

diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index 8bce5f122af..20bf140a297 100644
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -1471,7 +1471,9 @@ ggml_backend_sched_t ggml_backend_sched_new(
         bool parallel) {
     GGML_ASSERT(n_backends > 0);
     GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS);
+#ifndef GGML_BACKEND_DL // What's wrong with a GPU here ?
     GGML_ASSERT(ggml_backend_dev_type(ggml_backend_get_device(backends[n_backends - 1])) == GGML_BACKEND_DEVICE_TYPE_CPU);
+#endif
 
     struct ggml_backend_sched * sched = (ggml_backend_sched *) calloc(1, sizeof(struct ggml_backend_sched));
 
diff --git a/src/whisper.cpp b/src/whisper.cpp
index fc5de5cdbf8..cf09084a5c6 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -1361,6 +1361,10 @@ static std::vector<ggml_backend_t> whisper_backend_init(const whisper_context_pa
 
     ggml_backend_t backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
     if (backend_cpu == nullptr) {
+        #ifdef GGML_BACKEND_DL
+        // If not using a load_all it is possible CPU is null
+        return result;
+        #endif  
         throw std::runtime_error("failed to initialize CPU backend");
     }
     result.push_back(backend_cpu);
@@ -1396,6 +1400,12 @@ static buft_list_t make_buft_list(whisper_context_params & params) {
 
     // CPU Extra
     auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
+    #ifdef GGML_BACKEND_DL
+    // If not using a load_all it is possible CPU is null
+    if(cpu_dev == nullptr) {
+        return buft_list;
+    }
+    #endif
     auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
     auto get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
         ggml_backend_reg_get_proc_address(cpu_reg, "ggml_backend_dev_get_extra_bufts");

From e1ba4123d7822f32cd424391c84c59fdf522dbce Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Thu, 17 Apr 2025 11:04:29 +0100
Subject: [PATCH 09/14] Rename WHISPER_BACKEND_DL -> GGML_BACKEND_DL

---
 ggml/src/ggml-backend.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index 20bf140a297..2ccf269e235 100644
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -353,7 +353,7 @@ void print_error_no_device(void) {
 }
 
 ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) {
-    #ifdef WHISPER_BACKEND_DL
+    #ifdef GGML_BACKEND_DL
     if (backend == nullptr) {
         print_error_no_device();
     }
@@ -480,7 +480,7 @@ void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_d
 }
 
 ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device) {
-    #ifdef WHISPER_BACKEND_DL
+    #ifdef GGML_BACKEND_DL
     if (device == nullptr) {
         print_error_no_device();
     }

From 1fffca3bf84ebb6ef3a8d53f671d2740329e7b5c Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Thu, 17 Apr 2025 14:11:10 +0100
Subject: [PATCH 10/14] Modify bench.cpp to add -d/--device option for
 GGML_BACKEND_DL only

---
 examples/bench/bench.cpp | 47 +++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp
index 33a864ee84a..9dfac30dfd5 100644
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@@ -11,6 +11,9 @@ struct whisper_params {
     int32_t what = 0; // what to benchmark: 0 - whisper encoder, 1 - memcpy, 2 - ggml_mul_mat
 
     std::string model = "models/ggml-base.en.bin";
+    #ifdef GGML_BACKEND_DL
+    std::string device = "";
+    #endif
 
     bool use_gpu    = true;
     bool flash_attn = false;
@@ -28,6 +31,9 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
         }
         else if (arg == "-t"  || arg == "--threads")    { params.n_threads  = std::stoi(argv[++i]); }
         else if (arg == "-m"  || arg == "--model")      { params.model      = argv[++i]; }
+        #ifdef GGML_BACKEND_DL
+        else if (arg == "-d"  || arg == "--device")     { params.device     = argv[++i]; }
+        #endif
         else if (arg == "-w"  || arg == "--what")       { params.what       = atoi(argv[++i]); }
         else if (arg == "-ng" || arg == "--no-gpu")     { params.use_gpu    = false; }
         else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; }
@@ -46,15 +52,21 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
     fprintf(stderr, "usage: %s [options]\n", argv[0]);
     fprintf(stderr, "\n");
     fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,       --help        [default] show this help message and exit\n");
-    fprintf(stderr, "  -t N,     --threads N   [%-7d] number of threads to use during computation\n", params.n_threads);
-    fprintf(stderr, "  -m FNAME, --model FNAME [%-7s] model path\n",                                  params.model.c_str());
-    fprintf(stderr, "  -w N,     --what N      [%-7d] what to benchmark:\n",                          params.what);
-    fprintf(stderr, "                           %-7s  0 - whisper\n",                                 "");
-    fprintf(stderr, "                           %-7s  1 - memcpy\n",                                  "");
-    fprintf(stderr, "                           %-7s  2 - ggml_mul_mat\n",                            "");
-    fprintf(stderr, "  -ng,      --no-gpu      [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
-    fprintf(stderr, "  -fa,      --flash-attn  [%-7s] enable flash attention\n",                      params.flash_attn ? "true" : "false");
+    fprintf(stderr, "  -h,       --help           [default] show this help message and exit\n");
+    fprintf(stderr, "  -t N,     --threads N      [%-7d] number of threads to use during computation\n", params.n_threads);
+    fprintf(stderr, "  -m FNAME, --model FNAME    [%-7s] model path\n",                                  params.model.c_str());
+    #ifdef GGML_BACKEND_DL
+    fprintf(stderr, "  -d DEVICE, --device DEVICE [%-7s] device type\n"                                , params.device.c_str());
+    fprintf(stderr, "                              valid devices : blas, cann, cpu, cuda, hip, kompute,\n");
+    fprintf(stderr, "                                              musa, opencl, rpc, sycl and vulkan\n");
+    fprintf(stderr, "                                              Optional libraries must be supplied\n");
+    #endif
+    fprintf(stderr, "  -w N,     --what N         [%-7d] what to benchmark:\n",                          params.what);
+    fprintf(stderr, "                              %-7s  0 - whisper\n",                                 "");
+    fprintf(stderr, "                              %-7s  1 - memcpy\n",                                  "");
+    fprintf(stderr, "                              %-7s  2 - ggml_mul_mat\n",                            "");
+    fprintf(stderr, "  -ng,      --no-gpu         [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
+    fprintf(stderr, "  -fa,      --flash-attn     [%-7s] enable flash attention\n",                      params.flash_attn ? "true" : "false");
     fprintf(stderr, "\n");
 }
 
@@ -65,12 +77,27 @@ static int whisper_bench_full(const whisper_params & params) {
     // the model is initialised in whisper_init_from_file_with_params
     // Failure to do this will result in attempts to query null devices
     #ifdef GGML_BACKEND_DL
-    ggml_backend_load_all();
+    // If params.device is "" then load all devices otherwise just load named 
+    // device (and hope they got it right). Really should check against valid 
+    // device names
+    if (params.device.empty()) {
+        ggml_backend_load_all();
+    } else {
+        if(ggml_backend_load_best(params.device.c_str(), true, nullptr) == nullptr) {
+            fprintf(stderr, "error: could not load device %s\n", params.device.c_str());
+            return 5;
+        }
+    }
     #endif
 
     struct whisper_context_params cparams = whisper_context_default_params();
 
+    #ifdef GGML_BACKEND_DL
+    // Always allow GPU if GGML_BACKEND_DL as it can be overriden or only choice
+    cparams.use_gpu    = true;
+    #else
     cparams.use_gpu    = params.use_gpu;
+    #endif
     cparams.flash_attn = params.flash_attn;
 
     struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);

From 0ebfeb5245097ae85cf8d56e4fa9ccd0302887d7 Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Thu, 17 Apr 2025 14:50:38 +0100
Subject: [PATCH 11/14] Remove ggml-backend.cpp/h alterations

---
 ggml/include/ggml-backend.h |  5 -----
 ggml/src/ggml-backend.cpp   | 18 ------------------
 2 files changed, 23 deletions(-)

diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index 859c5570315..e0073c8b641 100644
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@@ -349,11 +349,6 @@ extern "C" {
     GGML_API ggml_backend_buffer_t      ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
     GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
     
-    // Load Best exposed to allow loading of specific types of backend
-    // Notably this allows you to load only one specific backend ignoring all
-    // others (e.g. only load cuda - without cpu)
-    GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path);
-    
 #ifdef  __cplusplus
 }
 #endif
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index 2ccf269e235..273075f4e54 100644
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -346,18 +346,7 @@ bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor *
     return ggml_backend_dev_offload_op(backend->device, op);
 }
 
-void print_error_no_device(void) {
-    fprintf(stderr, "You are attampting to use a null backend.\n");
-    fprintf(stderr, "Please verify the backend is loaded before you try to use one\n");
-    fprintf(stderr, "See bench.cpp / cli.cpp / stream.cpp for example\n");
-}
-
 ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) {
-    #ifdef GGML_BACKEND_DL
-    if (backend == nullptr) {
-        print_error_no_device();
-    }
-    #endif
     return backend->device;
 }
 
@@ -480,11 +469,6 @@ void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_d
 }
 
 ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device) {
-    #ifdef GGML_BACKEND_DL
-    if (device == nullptr) {
-        print_error_no_device();
-    }
-    #endif
     return device->reg;
 }
 
@@ -1471,9 +1455,7 @@ ggml_backend_sched_t ggml_backend_sched_new(
         bool parallel) {
     GGML_ASSERT(n_backends > 0);
     GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS);
-#ifndef GGML_BACKEND_DL // What's wrong with a GPU here ?
     GGML_ASSERT(ggml_backend_dev_type(ggml_backend_get_device(backends[n_backends - 1])) == GGML_BACKEND_DEVICE_TYPE_CPU);
-#endif
 
     struct ggml_backend_sched * sched = (ggml_backend_sched *) calloc(1, sizeof(struct ggml_backend_sched));
 

From 4f310d1d943f4c28f42f70cc54998d94f244fe4a Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Fri, 18 Apr 2025 12:23:06 +0100
Subject: [PATCH 12/14] Temp state

---
 examples/bench/bench.cpp           |  1 +
 examples/talk-llama/talk-llama.cpp | 24 +++++++++++++++++++++---
 ggml/include/ggml-backend.h        |  3 +++
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp
index 9dfac30dfd5..c9603ca0318 100644
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@@ -87,6 +87,7 @@ static int whisper_bench_full(const whisper_params & params) {
             fprintf(stderr, "error: could not load device %s\n", params.device.c_str());
             return 5;
         }
+        ggml_backend_load_best("cpu", true, nullptr);
     }
     #endif
 
diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp
index 9097c491b61..44da2d5ee9c 100644
--- a/examples/talk-llama/talk-llama.cpp
+++ b/examples/talk-llama/talk-llama.cpp
@@ -7,11 +7,13 @@
 #include "whisper.h"
 #include "llama.h"
 
+#include <iostream>
+#include <iomanip>
+#include <ctime>
 #include <chrono>
 #include <cstdio>
 #include <fstream>
 #include <regex>
-#include <regex>
 #include <sstream>
 #include <string>
 #include <thread>
@@ -35,6 +37,15 @@ static std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const
     return result;
 }
 
+static std::string llama_time_now(void) {
+    auto t = std::time(nullptr);
+    auto tm = *std::localtime(&t);
+
+    std::ostringstream oss;
+    oss << std::put_time(&tm, "%H:%M:%S");
+    return oss.str(); 
+}
+
 static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
     const llama_model * model = llama_get_model(ctx);
     const llama_vocab * vocab = llama_model_get_vocab(model);
@@ -285,6 +296,13 @@ int main(int argc, char ** argv) {
         exit(0);
     }
 
+    // If we're using a GGML_BACKEND_DL build we need to load backends before
+    // the model is initialised in whisper_init_from_file_with_params
+    // Failure to do this will result in attempts to query null devices
+    #ifdef GGML_BACKEND_DL
+    ggml_backend_load_all();
+    #endif
+
     // whisper init
 
     struct whisper_context_params cparams = whisper_context_default_params();
@@ -524,7 +542,7 @@ int main(int argc, char ** argv) {
     }
 
     printf("\n");
-    printf("%s%s", params.person.c_str(), chat_symb.c_str());
+    printf("%s[%s]%s", params.person.c_str(), llama_time_now().c_str(), chat_symb.c_str());
     fflush(stdout);
 
     // clear audio buffer
@@ -636,7 +654,7 @@ int main(int argc, char ** argv) {
                 force_speak = false;
 
                 text_heard.insert(0, 1, ' ');
-                text_heard += "\n" + params.bot_name + chat_symb;
+                text_heard += "\n" + params.bot_name + "[" + llama_time_now().c_str() + "]" + chat_symb;
                 fprintf(stdout, "%s%s%s", "\033[1m", text_heard.c_str(), "\033[0m");
                 fflush(stdout);
 
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index e0073c8b641..2a2e739554f 100644
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@@ -349,6 +349,9 @@ extern "C" {
     GGML_API ggml_backend_buffer_t      ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
     GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
     
+    // Temp fix now
+    GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path);
+
 #ifdef  __cplusplus
 }
 #endif

From 1d7a9c44183fe2e5aeeca99697c251cb7f087574 Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Fri, 18 Apr 2025 17:48:29 +0100
Subject: [PATCH 13/14] Tidy up and play nice

---
 examples/bench/bench.cpp           | 30 +-----------------------------
 examples/cli/cli.cpp               |  2 +-
 examples/command/command.cpp       |  7 +++++++
 examples/server/server.cpp         |  8 ++++++++
 examples/stream/stream.cpp         |  2 +-
 examples/talk-llama/talk-llama.cpp | 19 ++++---------------
 ggml/include/ggml-backend.h        |  2 +-
 include/whisper.h                  |  5 +++++
 src/whisper.cpp                    |  6 ++++++
 9 files changed, 34 insertions(+), 47 deletions(-)

diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp
index c9603ca0318..3f85950e17e 100644
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@@ -11,9 +11,6 @@ struct whisper_params {
     int32_t what = 0; // what to benchmark: 0 - whisper encoder, 1 - memcpy, 2 - ggml_mul_mat
 
     std::string model = "models/ggml-base.en.bin";
-    #ifdef GGML_BACKEND_DL
-    std::string device = "";
-    #endif
 
     bool use_gpu    = true;
     bool flash_attn = false;
@@ -31,9 +28,6 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
         }
         else if (arg == "-t"  || arg == "--threads")    { params.n_threads  = std::stoi(argv[++i]); }
         else if (arg == "-m"  || arg == "--model")      { params.model      = argv[++i]; }
-        #ifdef GGML_BACKEND_DL
-        else if (arg == "-d"  || arg == "--device")     { params.device     = argv[++i]; }
-        #endif
         else if (arg == "-w"  || arg == "--what")       { params.what       = atoi(argv[++i]); }
         else if (arg == "-ng" || arg == "--no-gpu")     { params.use_gpu    = false; }
         else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; }
@@ -55,12 +49,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
     fprintf(stderr, "  -h,       --help           [default] show this help message and exit\n");
     fprintf(stderr, "  -t N,     --threads N      [%-7d] number of threads to use during computation\n", params.n_threads);
     fprintf(stderr, "  -m FNAME, --model FNAME    [%-7s] model path\n",                                  params.model.c_str());
-    #ifdef GGML_BACKEND_DL
-    fprintf(stderr, "  -d DEVICE, --device DEVICE [%-7s] device type\n"                                , params.device.c_str());
-    fprintf(stderr, "                              valid devices : blas, cann, cpu, cuda, hip, kompute,\n");
-    fprintf(stderr, "                                              musa, opencl, rpc, sycl and vulkan\n");
-    fprintf(stderr, "                                              Optional libraries must be supplied\n");
-    #endif
     fprintf(stderr, "  -w N,     --what N         [%-7d] what to benchmark:\n",                          params.what);
     fprintf(stderr, "                              %-7s  0 - whisper\n",                                 "");
     fprintf(stderr, "                              %-7s  1 - memcpy\n",                                  "");
@@ -77,28 +65,12 @@ static int whisper_bench_full(const whisper_params & params) {
     // the model is initialised in whisper_init_from_file_with_params
     // Failure to do this will result in attempts to query null devices
     #ifdef GGML_BACKEND_DL
-    // If params.device is "" then load all devices otherwise just load named 
-    // device (and hope they got it right). Really should check against valid 
-    // device names
-    if (params.device.empty()) {
-        ggml_backend_load_all();
-    } else {
-        if(ggml_backend_load_best(params.device.c_str(), true, nullptr) == nullptr) {
-            fprintf(stderr, "error: could not load device %s\n", params.device.c_str());
-            return 5;
-        }
-        ggml_backend_load_best("cpu", true, nullptr);
-    }
+    whisper_backend_load_all();
     #endif
 
     struct whisper_context_params cparams = whisper_context_default_params();
 
-    #ifdef GGML_BACKEND_DL
-    // Always allow GPU if GGML_BACKEND_DL as it can be overriden or only choice
-    cparams.use_gpu    = true;
-    #else
     cparams.use_gpu    = params.use_gpu;
-    #endif
     cparams.flash_attn = params.flash_attn;
 
     struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp
index 1e000643ddd..100407d7a86 100644
--- a/examples/cli/cli.cpp
+++ b/examples/cli/cli.cpp
@@ -1006,7 +1006,7 @@ int main(int argc, char ** argv) {
     // the model is initialised in whisper_init_from_file_with_params
     // Failure to do this will result in attempts to query null devices
     #ifdef GGML_BACKEND_DL
-    ggml_backend_load_all();
+    whisper_backend_load_all();
     #endif
 
     // whisper init
diff --git a/examples/command/command.cpp b/examples/command/command.cpp
index 9dc8f629995..4fd27d8d523 100644
--- a/examples/command/command.cpp
+++ b/examples/command/command.cpp
@@ -690,6 +690,13 @@ int main(int argc, char ** argv) {
         exit(0);
     }
 
+    // If we're using a GGML_BACKEND_DL build we need to load backends before
+    // the model is initialised in whisper_init_from_file_with_params
+    // Failure to do this will result in attempts to query null devices
+    #ifdef GGML_BACKEND_DL
+    whisper_backend_load_all();
+    #endif
+
     // whisper init
 
     struct whisper_context_params cparams = whisper_context_default_params();
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 38da61673df..b21dd81cab3 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -543,6 +543,14 @@ int main(int argc, char ** argv) {
     if (sparams.ffmpeg_converter) {
         check_ffmpeg_availibility();
     }
+
+    // If we're using a GGML_BACKEND_DL build we need to load backends before
+    // the model is initialised in whisper_init_from_file_with_params
+    // Failure to do this will result in attempts to query null devices
+    #ifdef GGML_BACKEND_DL
+    whisper_backend_load_all();
+    #endif
+
     // whisper init
     struct whisper_context_params cparams = whisper_context_default_params();
 
diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp
index ce192b472d9..d28a320423b 100644
--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@@ -159,7 +159,7 @@ int main(int argc, char ** argv) {
     // the model is initialised in whisper_init_from_file_with_params
     // Failure to do this will result in attempts to query null devices
     #ifdef GGML_BACKEND_DL
-    ggml_backend_load_all();
+    whisper_backend_load_all();
     #endif
 
     struct whisper_context_params cparams = whisper_context_default_params();
diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp
index 44da2d5ee9c..eedb10a9c6d 100644
--- a/examples/talk-llama/talk-llama.cpp
+++ b/examples/talk-llama/talk-llama.cpp
@@ -7,13 +7,11 @@
 #include "whisper.h"
 #include "llama.h"
 
-#include <iostream>
-#include <iomanip>
-#include <ctime>
 #include <chrono>
 #include <cstdio>
 #include <fstream>
 #include <regex>
+#include <regex>
 #include <sstream>
 #include <string>
 #include <thread>
@@ -37,15 +35,6 @@ static std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const
     return result;
 }
 
-static std::string llama_time_now(void) {
-    auto t = std::time(nullptr);
-    auto tm = *std::localtime(&t);
-
-    std::ostringstream oss;
-    oss << std::put_time(&tm, "%H:%M:%S");
-    return oss.str(); 
-}
-
 static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
     const llama_model * model = llama_get_model(ctx);
     const llama_vocab * vocab = llama_model_get_vocab(model);
@@ -300,7 +289,7 @@ int main(int argc, char ** argv) {
     // the model is initialised in whisper_init_from_file_with_params
     // Failure to do this will result in attempts to query null devices
     #ifdef GGML_BACKEND_DL
-    ggml_backend_load_all();
+    whisper_backend_load_all();
     #endif
 
     // whisper init
@@ -542,7 +531,7 @@ int main(int argc, char ** argv) {
     }
 
     printf("\n");
-    printf("%s[%s]%s", params.person.c_str(), llama_time_now().c_str(), chat_symb.c_str());
+    printf("%s%s", params.person.c_str(), chat_symb.c_str());
     fflush(stdout);
 
     // clear audio buffer
@@ -654,7 +643,7 @@ int main(int argc, char ** argv) {
                 force_speak = false;
 
                 text_heard.insert(0, 1, ' ');
-                text_heard += "\n" + params.bot_name + "[" + llama_time_now().c_str() + "]" + chat_symb;
+                text_heard += "\n" + params.bot_name + chat_symb;
                 fprintf(stdout, "%s%s%s", "\033[1m", text_heard.c_str(), "\033[0m");
                 fflush(stdout);
 
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index 2a2e739554f..0a035a90a84 100644
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@@ -349,7 +349,7 @@ extern "C" {
     GGML_API ggml_backend_buffer_t      ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
     GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
     
-    // Temp fix now
+    // Expose ggml_backend_load_best for external use
     GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path);
 
 #ifdef  __cplusplus
diff --git a/include/whisper.h b/include/whisper.h
index 1e1375033ad..f250e14d1a4 100644
--- a/include/whisper.h
+++ b/include/whisper.h
@@ -668,6 +668,11 @@ extern "C" {
     // Get the no_speech probability for the specified segment
     WHISPER_API float whisper_full_get_segment_no_speech_prob           (struct whisper_context * ctx, int i_segment);
     WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment);
+
+    #ifdef GGML_BACKEND_DL
+    WHISPER_API void whisper_backend_load_all(void);
+    #endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/whisper.cpp b/src/whisper.cpp
index cf09084a5c6..6ed5bdbc8e2 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -7545,3 +7545,9 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text
     fputs(text, stderr);
     fflush(stderr);
 }
+
+#ifdef GGML_BACKEND_DL
+static void whisper_backend_load_all(void) {
+    ggml_backend_load_all();
+}
+#endif

From 18edf34ebd96c2626b4eb183829603b02e7a90aa Mon Sep 17 00:00:00 2001
From: peardox <simon@peardox.com>
Date: Fri, 18 Apr 2025 19:22:49 +0100
Subject: [PATCH 14/14] Tidy up and play nice

---
 ggml/src/ggml-backend-reg.cpp | 2 +-
 src/whisper.cpp               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
index 405d8e31514..d1ac9899a38 100644
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@@ -485,7 +485,7 @@ static fs::path backend_filename_extension() {
 #endif
 }
 
-static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
+ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
     // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
     const fs::path name_path = fs::u8path(name);
     const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
diff --git a/src/whisper.cpp b/src/whisper.cpp
index 6ed5bdbc8e2..e1b7016052b 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -7547,7 +7547,7 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text
 }
 
 #ifdef GGML_BACKEND_DL
-static void whisper_backend_load_all(void) {
+void whisper_backend_load_all(void) {
     ggml_backend_load_all();
 }
 #endif