ggml-org · peardox · Apr 16, 2025 · Apr 16, 2025 · Apr 16, 2025 · Apr 16, 2025
diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp
@@ -46,21 +46,28 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
     fprintf(stderr, "usage: %s [options]\n", argv[0]);
     fprintf(stderr, "\n");
     fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,       --help        [default] show this help message and exit\n");
-    fprintf(stderr, "  -t N,     --threads N   [%-7d] number of threads to use during computation\n", params.n_threads);
-    fprintf(stderr, "  -m FNAME, --model FNAME [%-7s] model path\n",                                  params.model.c_str());
-    fprintf(stderr, "  -w N,     --what N      [%-7d] what to benchmark:\n",                          params.what);
-    fprintf(stderr, "                           %-7s  0 - whisper\n",                                 "");
-    fprintf(stderr, "                           %-7s  1 - memcpy\n",                                  "");
-    fprintf(stderr, "                           %-7s  2 - ggml_mul_mat\n",                            "");
-    fprintf(stderr, "  -ng,      --no-gpu      [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
-    fprintf(stderr, "  -fa,      --flash-attn  [%-7s] enable flash attention\n",                      params.flash_attn ? "true" : "false");
+    fprintf(stderr, "  -h,       --help           [default] show this help message and exit\n");
+    fprintf(stderr, "  -t N,     --threads N      [%-7d] number of threads to use during computation\n", params.n_threads);
+    fprintf(stderr, "  -m FNAME, --model FNAME    [%-7s] model path\n",                                  params.model.c_str());
+    fprintf(stderr, "  -w N,     --what N         [%-7d] what to benchmark:\n",                          params.what);
+    fprintf(stderr, "                              %-7s  0 - whisper\n",                                 "");
+    fprintf(stderr, "                              %-7s  1 - memcpy\n",                                  "");
+    fprintf(stderr, "                              %-7s  2 - ggml_mul_mat\n",                            "");
+    fprintf(stderr, "  -ng,      --no-gpu         [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
+    fprintf(stderr, "  -fa,      --flash-attn     [%-7s] enable flash attention\n",                      params.flash_attn ? "true" : "false");
     fprintf(stderr, "\n");
 }
 
 static int whisper_bench_full(const whisper_params & params) {
     // whisper init
 
+    // If we're using a GGML_BACKEND_DL build we need to load backends before
+    // the model is initialised in whisper_init_from_file_with_params
+    // Failure to do this will result in attempts to query null devices
+    #ifdef GGML_BACKEND_DL
+    whisper_backend_load_all();
+    #endif
+
     struct whisper_context_params cparams = whisper_context_default_params();
 
     cparams.use_gpu    = params.use_gpu;

diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp
@@ -1002,6 +1002,13 @@ int main(int argc, char ** argv) {
         whisper_log_set(cb_log_disable, NULL);
     }
 
+    // If we're using a GGML_BACKEND_DL build we need to load backends before
+    // the model is initialised in whisper_init_from_file_with_params
+    // Failure to do this will result in attempts to query null devices
+    #ifdef GGML_BACKEND_DL
+    whisper_backend_load_all();
+    #endif
+
     // whisper init
 
     struct whisper_context_params cparams = whisper_context_default_params();

diff --git a/examples/command/command.cpp b/examples/command/command.cpp
@@ -690,6 +690,13 @@ int main(int argc, char ** argv) {
         exit(0);
     }
 
+    // If we're using a GGML_BACKEND_DL build we need to load backends before
+    // the model is initialised in whisper_init_from_file_with_params
+    // Failure to do this will result in attempts to query null devices
+    #ifdef GGML_BACKEND_DL
+    whisper_backend_load_all();
+    #endif
+
     // whisper init
 
     struct whisper_context_params cparams = whisper_context_default_params();

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -543,6 +543,14 @@ int main(int argc, char ** argv) {
     if (sparams.ffmpeg_converter) {
         check_ffmpeg_availibility();
     }
+
+    // If we're using a GGML_BACKEND_DL build we need to load backends before
+    // the model is initialised in whisper_init_from_file_with_params
+    // Failure to do this will result in attempts to query null devices
+    #ifdef GGML_BACKEND_DL
+    whisper_backend_load_all();
+    #endif
+
     // whisper init
     struct whisper_context_params cparams = whisper_context_default_params();
 

diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp
@@ -155,6 +155,13 @@ int main(int argc, char ** argv) {
         exit(0);
     }
 
+    // If we're using a GGML_BACKEND_DL build we need to load backends before
+    // the model is initialised in whisper_init_from_file_with_params
+    // Failure to do this will result in attempts to query null devices
+    #ifdef GGML_BACKEND_DL
+    whisper_backend_load_all();
+    #endif
+
     struct whisper_context_params cparams = whisper_context_default_params();
 
     cparams.use_gpu    = params.use_gpu;

diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp
@@ -285,6 +285,13 @@ int main(int argc, char ** argv) {
         exit(0);
     }
 
+    // If we're using a GGML_BACKEND_DL build we need to load backends before
+    // the model is initialised in whisper_init_from_file_with_params
+    // Failure to do this will result in attempts to query null devices
+    #ifdef GGML_BACKEND_DL
+    whisper_backend_load_all();
+    #endif
+
     // whisper init
 
     struct whisper_context_params cparams = whisper_context_default_params();

diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
@@ -348,6 +348,9 @@ extern "C" {
     // CPU buffer types are always available
     GGML_API ggml_backend_buffer_t      ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
     GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
+
+    // Expose ggml_backend_load_best for external use
+    GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path);
 
 #ifdef  __cplusplus
 }

diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
@@ -485,7 +485,7 @@ static fs::path backend_filename_extension() {
 #endif
 }
 
-static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
+ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
     // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
     const fs::path name_path = fs::u8path(name);
     const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();

diff --git a/include/whisper.h b/include/whisper.h
@@ -668,6 +668,11 @@ extern "C" {
     // Get the no_speech probability for the specified segment
     WHISPER_API float whisper_full_get_segment_no_speech_prob           (struct whisper_context * ctx, int i_segment);
     WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment);
+
+    #ifdef GGML_BACKEND_DL
+    WHISPER_API void whisper_backend_load_all(void);
+    #endif
+
 #ifdef __cplusplus
 }
 #endif

diff --git a/src/whisper.cpp b/src/whisper.cpp
@@ -208,15 +208,6 @@ static bool ggml_graph_compute_helper(
     return t;
 }
 
-static void whisper_load_backends() {
-#ifdef GGML_BACKEND_DL
-    static std::once_flag flag;
-    std::call_once(flag, []() {
-        ggml_backend_load_all();
-    });
-#endif
-}
-
 // TODO: move these functions to ggml-base with support for ggml-backend?
 
 static ggml_tensor * whisper_set_f32(struct ggml_tensor * t, float v) {
@@ -1313,8 +1304,6 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) {
 static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) {
     ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);
 
-    whisper_load_backends();
-
     ggml_backend_dev_t dev = nullptr;
 
     int cnt = 0;
@@ -1372,6 +1361,10 @@ static std::vector<ggml_backend_t> whisper_backend_init(const whisper_context_pa
 
     ggml_backend_t backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
     if (backend_cpu == nullptr) {
+        #ifdef GGML_BACKEND_DL
+        // If not using a load_all it is possible CPU is null
+        return result;
+        #endif  
         throw std::runtime_error("failed to initialize CPU backend");
     }
     result.push_back(backend_cpu);
@@ -1407,6 +1400,12 @@ static buft_list_t make_buft_list(whisper_context_params & params) {
 
     // CPU Extra
     auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
+    #ifdef GGML_BACKEND_DL
+    // If not using a load_all it is possible CPU is null
+    if(cpu_dev == nullptr) {
+        return buft_list;
+    }
+    #endif
     auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
     auto get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
         ggml_backend_reg_get_proc_address(cpu_reg, "ggml_backend_dev_get_extra_bufts");
@@ -4321,8 +4320,6 @@ static int whisper_has_openvino(void) {
 const char * whisper_print_system_info(void) {
     static std::string s;
 
-    whisper_load_backends();
-
     s  = "";
     s += "WHISPER : ";
     s += "COREML = "    + std::to_string(whisper_has_coreml())     + " | ";
@@ -6776,8 +6773,6 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
 }
 
 WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
-    whisper_load_backends();
-
     static std::string s;
     s = "";
     char strbuf[256];
@@ -7550,3 +7545,9 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text
     fputs(text, stderr);
     fflush(stderr);
 }
+
+#ifdef GGML_BACKEND_DL
+void whisper_backend_load_all(void) {
+    ggml_backend_load_all();
+}
+#endif