From 3d3493ee3d8b479dd33804686d087e03b8343fa0 Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 16 Apr 2025 18:41:18 +0100 Subject: [PATCH 01/14] Expose ggml_backend_load_best --- ggml/include/ggml-backend.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index 64671495b38..0d9ebe88ce1 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -348,7 +348,12 @@ extern "C" { // CPU buffer types are always available GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void); - + + // Load Best exposed to allow loading of specific types of backend + // Notably this allows you to load only one specific backend ignoring all + // others (e.g. only load cuda - without cpu) + GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) + #ifdef __cplusplus } #endif From 81965b51e4863fd7c2461512509a9c22d55cac33 Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 16 Apr 2025 19:06:11 +0100 Subject: [PATCH 02/14] Fix header ggml\include\ggml_backend.h --- ggml/include/ggml-backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index 0d9ebe88ce1..859c5570315 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -352,7 +352,7 @@ extern "C" { // Load Best exposed to allow loading of specific types of backend // Notably this allows you to load only one specific backend ignoring all // others (e.g. only load cuda - without cpu) - GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) + GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path); #ifdef __cplusplus } From bee63b81425df46715a78698f03f2cbc9b1009f2 Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 16 Apr 2025 19:18:10 +0100 Subject: [PATCH 03/14] if GGML_BACKEND_DL defined don't use whisper_load_backends in whisper.cpp --- src/whisper.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index 2c83f7bab3b..c4c076a8b99 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -1313,8 +1313,10 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) { static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) { ggml_log_set(g_state.log_callback, g_state.log_callback_user_data); + #ifndef GGML_BACKEND_DL whisper_load_backends(); - + #eneif + ggml_backend_dev_t dev = nullptr; int cnt = 0; @@ -4321,7 +4323,9 @@ static int whisper_has_openvino(void) { const char * whisper_print_system_info(void) { static std::string s; + #ifndef GGML_BACKEND_DL whisper_load_backends(); + #eneif s = ""; s += "WHISPER : "; @@ -6776,7 +6780,9 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) { } WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) { + #ifndef GGML_BACKEND_DL whisper_load_backends(); + #eneif static std::string s; s = ""; From cc265ab4786b3896a29156241b982e801c2e8d94 Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 16 Apr 2025 19:29:32 +0100 Subject: [PATCH 04/14] Update examples bench, cli and stream to cater for alternate backend load --- examples/bench/bench.cpp | 7 +++++++ examples/cli/cli.cpp | 7 +++++++ examples/stream/stream.cpp | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 54f73110d42..1cd75067415 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -61,6 +61,13 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para static int whisper_bench_full(const whisper_params & params) { // whisper init + // If we're using a GGML_BACKEND_DL build we need to load backends before + // the model is initialised in whisper_init_from_file_with_params + // Failure to do this will result in attempts to query null devices + #ifdef GGML_BACKEND_DL + ggml_backend_load_all(); + #eneif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index 4b2b3521b80..a8dfcc1d12c 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -1002,6 +1002,13 @@ int main(int argc, char ** argv) { whisper_log_set(cb_log_disable, NULL); } + // If we're using a GGML_BACKEND_DL build we need to load backends before + // the model is initialised in whisper_init_from_file_with_params + // Failure to do this will result in attempts to query null devices + #ifdef GGML_BACKEND_DL + ggml_backend_load_all(); + #eneif + // whisper init struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 65c6587db92..17048647cbb 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -155,6 +155,13 @@ int main(int argc, char ** argv) { exit(0); } + // If we're using a GGML_BACKEND_DL build we need to load backends before + // the model is initialised in whisper_init_from_file_with_params + // Failure to do this will result in attempts to query null devices + #ifdef GGML_BACKEND_DL + ggml_backend_load_all(); + #eneif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; From b96679c5e2a023f0694705439856cf4d3f6d6896 Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 16 Apr 2025 19:41:56 +0100 Subject: [PATCH 05/14] Fix dumb type eneif -> endif --- examples/bench/bench.cpp | 2 +- examples/cli/cli.cpp | 2 +- examples/stream/stream.cpp | 2 +- src/whisper.cpp | 10 +++++----- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 1cd75067415..33a864ee84a 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -66,7 +66,7 @@ static int whisper_bench_full(const whisper_params & params) { // Failure to do this will result in attempts to query null devices #ifdef GGML_BACKEND_DL ggml_backend_load_all(); - #eneif + #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index a8dfcc1d12c..1e000643ddd 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -1007,7 +1007,7 @@ int main(int argc, char ** argv) { // Failure to do this will result in attempts to query null devices #ifdef GGML_BACKEND_DL ggml_backend_load_all(); - #eneif + #endif // whisper init diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 17048647cbb..ce192b472d9 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -160,7 +160,7 @@ int main(int argc, char ** argv) { // Failure to do this will result in attempts to query null devices #ifdef GGML_BACKEND_DL ggml_backend_load_all(); - #eneif + #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/src/whisper.cpp b/src/whisper.cpp index c4c076a8b99..899cafecc4d 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -209,12 +209,12 @@ static bool ggml_graph_compute_helper( } static void whisper_load_backends() { -#ifdef GGML_BACKEND_DL + #ifndef GGML_BACKEND_DL static std::once_flag flag; std::call_once(flag, []() { ggml_backend_load_all(); }); -#endif + #endif } // TODO: move these functions to ggml-base with support for ggml-backend? @@ -1315,7 +1315,7 @@ static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & pa #ifndef GGML_BACKEND_DL whisper_load_backends(); - #eneif + #endif ggml_backend_dev_t dev = nullptr; @@ -4325,7 +4325,7 @@ const char * whisper_print_system_info(void) { #ifndef GGML_BACKEND_DL whisper_load_backends(); - #eneif + #endif s = ""; s += "WHISPER : "; @@ -6782,7 +6782,7 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) { WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) { #ifndef GGML_BACKEND_DL whisper_load_backends(); - #eneif + #endif static std::string s; s = ""; From 3a46af03c388ae173b8cce590e105ef9bc5c0854 Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 16 Apr 2025 20:38:48 +0100 Subject: [PATCH 06/14] Add print_error_no_device and trigger if null backend passed. There may be more cases, two identified and trapped --- ggml/src/ggml-backend.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 273075f4e54..85cadc0baf9 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -346,7 +346,18 @@ bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * return ggml_backend_dev_offload_op(backend->device, op); } +void print_error_no_device(void) { + fprintf(stderr, "You are attampting to use a null backend.\n"); + fprintf(stderr, "Please verify the backend is loaded before you try to use one\n"); + fprintf(stderr, "See bench.cpp / cli.cpp / stream.cpp for example\n"); +} + ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) { + #ifdef GGML_BACKEND_DL + if (backend == nullptr) { + print_error_no_device(); + } + #endif return backend->device; } @@ -469,6 +480,11 @@ void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_d } ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device) { + #ifdef GGML_BACKEND_DL + if (device == nullptr) { + print_error_no_device(); + } + #endif return device->reg; } From 5fb957a0e53d585cd3fbf0d93ca609a786b11551 Mon Sep 17 00:00:00 2001 From: peardox Date: Thu, 17 Apr 2025 04:29:40 +0100 Subject: [PATCH 07/14] Remove all references to whisper_load_backends --- ggml/src/ggml-backend.cpp | 4 ++-- src/whisper.cpp | 21 --------------------- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 85cadc0baf9..8bce5f122af 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -353,7 +353,7 @@ void print_error_no_device(void) { } ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) { - #ifdef GGML_BACKEND_DL + #ifdef WHISPER_BACKEND_DL if (backend == nullptr) { print_error_no_device(); } @@ -480,7 +480,7 @@ void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_d } ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device) { - #ifdef GGML_BACKEND_DL + #ifdef WHISPER_BACKEND_DL if (device == nullptr) { print_error_no_device(); } diff --git a/src/whisper.cpp b/src/whisper.cpp index 899cafecc4d..fc5de5cdbf8 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -208,15 +208,6 @@ static bool ggml_graph_compute_helper( return t; } -static void whisper_load_backends() { - #ifndef GGML_BACKEND_DL - static std::once_flag flag; - std::call_once(flag, []() { - ggml_backend_load_all(); - }); - #endif -} - // TODO: move these functions to ggml-base with support for ggml-backend? static ggml_tensor * whisper_set_f32(struct ggml_tensor * t, float v) { @@ -1313,10 +1304,6 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) { static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) { ggml_log_set(g_state.log_callback, g_state.log_callback_user_data); - #ifndef GGML_BACKEND_DL - whisper_load_backends(); - #endif - ggml_backend_dev_t dev = nullptr; int cnt = 0; @@ -4323,10 +4310,6 @@ static int whisper_has_openvino(void) { const char * whisper_print_system_info(void) { static std::string s; - #ifndef GGML_BACKEND_DL - whisper_load_backends(); - #endif - s = ""; s += "WHISPER : "; s += "COREML = " + std::to_string(whisper_has_coreml()) + " | "; @@ -6780,10 +6763,6 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) { } WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) { - #ifndef GGML_BACKEND_DL - whisper_load_backends(); - #endif - static std::string s; s = ""; char strbuf[256]; From 0146f9813df3f7af002d3e12ea9052477644a8e7 Mon Sep 17 00:00:00 2001 From: peardox Date: Thu, 17 Apr 2025 06:25:03 +0100 Subject: [PATCH 08/14] Enable ggml_backend_load_best and allow only one device to be requested --- ggml/src/ggml-backend.cpp | 2 ++ src/whisper.cpp | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 8bce5f122af..20bf140a297 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -1471,7 +1471,9 @@ ggml_backend_sched_t ggml_backend_sched_new( bool parallel) { GGML_ASSERT(n_backends > 0); GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS); +#ifndef GGML_BACKEND_DL // What's wrong with a GPU here ? GGML_ASSERT(ggml_backend_dev_type(ggml_backend_get_device(backends[n_backends - 1])) == GGML_BACKEND_DEVICE_TYPE_CPU); +#endif struct ggml_backend_sched * sched = (ggml_backend_sched *) calloc(1, sizeof(struct ggml_backend_sched)); diff --git a/src/whisper.cpp b/src/whisper.cpp index fc5de5cdbf8..cf09084a5c6 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -1361,6 +1361,10 @@ static std::vector whisper_backend_init(const whisper_context_pa ggml_backend_t backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr); if (backend_cpu == nullptr) { + #ifdef GGML_BACKEND_DL + // If not using a load_all it is possible CPU is null + return result; + #endif throw std::runtime_error("failed to initialize CPU backend"); } result.push_back(backend_cpu); @@ -1396,6 +1400,12 @@ static buft_list_t make_buft_list(whisper_context_params & params) { // CPU Extra auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); + #ifdef GGML_BACKEND_DL + // If not using a load_all it is possible CPU is null + if(cpu_dev == nullptr) { + return buft_list; + } + #endif auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev); auto get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t) ggml_backend_reg_get_proc_address(cpu_reg, "ggml_backend_dev_get_extra_bufts"); From e1ba4123d7822f32cd424391c84c59fdf522dbce Mon Sep 17 00:00:00 2001 From: peardox Date: Thu, 17 Apr 2025 11:04:29 +0100 Subject: [PATCH 09/14] Rename WHISPER_BACKEND_DL -> GGML_BACKEND_DL --- ggml/src/ggml-backend.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 20bf140a297..2ccf269e235 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -353,7 +353,7 @@ void print_error_no_device(void) { } ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) { - #ifdef WHISPER_BACKEND_DL + #ifdef GGML_BACKEND_DL if (backend == nullptr) { print_error_no_device(); } @@ -480,7 +480,7 @@ void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_d } ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device) { - #ifdef WHISPER_BACKEND_DL + #ifdef GGML_BACKEND_DL if (device == nullptr) { print_error_no_device(); } From 1fffca3bf84ebb6ef3a8d53f671d2740329e7b5c Mon Sep 17 00:00:00 2001 From: peardox Date: Thu, 17 Apr 2025 14:11:10 +0100 Subject: [PATCH 10/14] Modify bench.cpp to add -d/--device option for GGML_BACKEND_DL only --- examples/bench/bench.cpp | 47 +++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 33a864ee84a..9dfac30dfd5 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -11,6 +11,9 @@ struct whisper_params { int32_t what = 0; // what to benchmark: 0 - whisper encoder, 1 - memcpy, 2 - ggml_mul_mat std::string model = "models/ggml-base.en.bin"; + #ifdef GGML_BACKEND_DL + std::string device = ""; + #endif bool use_gpu = true; bool flash_attn = false; @@ -28,6 +31,9 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params } else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); } else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; } + #ifdef GGML_BACKEND_DL + else if (arg == "-d" || arg == "--device") { params.device = argv[++i]; } + #endif else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); } else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; } else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; } @@ -46,15 +52,21 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "options:\n"); - fprintf(stderr, " -h, --help [default] show this help message and exit\n"); - fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads); - fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str()); - fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what); - fprintf(stderr, " %-7s 0 - whisper\n", ""); - fprintf(stderr, " %-7s 1 - memcpy\n", ""); - fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", ""); - fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true"); - fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false"); + fprintf(stderr, " -h, --help [default] show this help message and exit\n"); + fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads); + fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str()); + #ifdef GGML_BACKEND_DL + fprintf(stderr, " -d DEVICE, --device DEVICE [%-7s] device type\n" , params.device.c_str()); + fprintf(stderr, " valid devices : blas, cann, cpu, cuda, hip, kompute,\n"); + fprintf(stderr, " musa, opencl, rpc, sycl and vulkan\n"); + fprintf(stderr, " Optional libraries must be supplied\n"); + #endif + fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what); + fprintf(stderr, " %-7s 0 - whisper\n", ""); + fprintf(stderr, " %-7s 1 - memcpy\n", ""); + fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", ""); + fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true"); + fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false"); fprintf(stderr, "\n"); } @@ -65,12 +77,27 @@ static int whisper_bench_full(const whisper_params & params) { // the model is initialised in whisper_init_from_file_with_params // Failure to do this will result in attempts to query null devices #ifdef GGML_BACKEND_DL - ggml_backend_load_all(); + // If params.device is "" then load all devices otherwise just load named + // device (and hope they got it right). Really should check against valid + // device names + if (params.device.empty()) { + ggml_backend_load_all(); + } else { + if(ggml_backend_load_best(params.device.c_str(), true, nullptr) == nullptr) { + fprintf(stderr, "error: could not load device %s\n", params.device.c_str()); + return 5; + } + } #endif struct whisper_context_params cparams = whisper_context_default_params(); + #ifdef GGML_BACKEND_DL + // Always allow GPU if GGML_BACKEND_DL as it can be overriden or only choice + cparams.use_gpu = true; + #else cparams.use_gpu = params.use_gpu; + #endif cparams.flash_attn = params.flash_attn; struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams); From 0ebfeb5245097ae85cf8d56e4fa9ccd0302887d7 Mon Sep 17 00:00:00 2001 From: peardox Date: Thu, 17 Apr 2025 14:50:38 +0100 Subject: [PATCH 11/14] Remove ggml-backend.cpp/h alterations --- ggml/include/ggml-backend.h | 5 ----- ggml/src/ggml-backend.cpp | 18 ------------------ 2 files changed, 23 deletions(-) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index 859c5570315..e0073c8b641 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -349,11 +349,6 @@ extern "C" { GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void); - // Load Best exposed to allow loading of specific types of backend - // Notably this allows you to load only one specific backend ignoring all - // others (e.g. only load cuda - without cpu) - GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path); - #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 2ccf269e235..273075f4e54 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -346,18 +346,7 @@ bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * return ggml_backend_dev_offload_op(backend->device, op); } -void print_error_no_device(void) { - fprintf(stderr, "You are attampting to use a null backend.\n"); - fprintf(stderr, "Please verify the backend is loaded before you try to use one\n"); - fprintf(stderr, "See bench.cpp / cli.cpp / stream.cpp for example\n"); -} - ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) { - #ifdef GGML_BACKEND_DL - if (backend == nullptr) { - print_error_no_device(); - } - #endif return backend->device; } @@ -480,11 +469,6 @@ void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_d } ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device) { - #ifdef GGML_BACKEND_DL - if (device == nullptr) { - print_error_no_device(); - } - #endif return device->reg; } @@ -1471,9 +1455,7 @@ ggml_backend_sched_t ggml_backend_sched_new( bool parallel) { GGML_ASSERT(n_backends > 0); GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS); -#ifndef GGML_BACKEND_DL // What's wrong with a GPU here ? GGML_ASSERT(ggml_backend_dev_type(ggml_backend_get_device(backends[n_backends - 1])) == GGML_BACKEND_DEVICE_TYPE_CPU); -#endif struct ggml_backend_sched * sched = (ggml_backend_sched *) calloc(1, sizeof(struct ggml_backend_sched)); From 4f310d1d943f4c28f42f70cc54998d94f244fe4a Mon Sep 17 00:00:00 2001 From: peardox Date: Fri, 18 Apr 2025 12:23:06 +0100 Subject: [PATCH 12/14] Temp state --- examples/bench/bench.cpp | 1 + examples/talk-llama/talk-llama.cpp | 24 +++++++++++++++++++++--- ggml/include/ggml-backend.h | 3 +++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 9dfac30dfd5..c9603ca0318 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -87,6 +87,7 @@ static int whisper_bench_full(const whisper_params & params) { fprintf(stderr, "error: could not load device %s\n", params.device.c_str()); return 5; } + ggml_backend_load_best("cpu", true, nullptr); } #endif diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp index 9097c491b61..44da2d5ee9c 100644 --- a/examples/talk-llama/talk-llama.cpp +++ b/examples/talk-llama/talk-llama.cpp @@ -7,11 +7,13 @@ #include "whisper.h" #include "llama.h" +#include +#include +#include #include #include #include #include -#include #include #include #include @@ -35,6 +37,15 @@ static std::vector llama_tokenize(struct llama_context * ctx, const return result; } +static std::string llama_time_now(void) { + auto t = std::time(nullptr); + auto tm = *std::localtime(&t); + + std::ostringstream oss; + oss << std::put_time(&tm, "%H:%M:%S"); + return oss.str(); +} + static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) { const llama_model * model = llama_get_model(ctx); const llama_vocab * vocab = llama_model_get_vocab(model); @@ -285,6 +296,13 @@ int main(int argc, char ** argv) { exit(0); } + // If we're using a GGML_BACKEND_DL build we need to load backends before + // the model is initialised in whisper_init_from_file_with_params + // Failure to do this will result in attempts to query null devices + #ifdef GGML_BACKEND_DL + ggml_backend_load_all(); + #endif + // whisper init struct whisper_context_params cparams = whisper_context_default_params(); @@ -524,7 +542,7 @@ int main(int argc, char ** argv) { } printf("\n"); - printf("%s%s", params.person.c_str(), chat_symb.c_str()); + printf("%s[%s]%s", params.person.c_str(), llama_time_now().c_str(), chat_symb.c_str()); fflush(stdout); // clear audio buffer @@ -636,7 +654,7 @@ int main(int argc, char ** argv) { force_speak = false; text_heard.insert(0, 1, ' '); - text_heard += "\n" + params.bot_name + chat_symb; + text_heard += "\n" + params.bot_name + "[" + llama_time_now().c_str() + "]" + chat_symb; fprintf(stdout, "%s%s%s", "\033[1m", text_heard.c_str(), "\033[0m"); fflush(stdout); diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index e0073c8b641..2a2e739554f 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -349,6 +349,9 @@ extern "C" { GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void); + // Temp fix now + GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path); + #ifdef __cplusplus } #endif From 1d7a9c44183fe2e5aeeca99697c251cb7f087574 Mon Sep 17 00:00:00 2001 From: peardox Date: Fri, 18 Apr 2025 17:48:29 +0100 Subject: [PATCH 13/14] Tidy up and play nice --- examples/bench/bench.cpp | 30 +----------------------------- examples/cli/cli.cpp | 2 +- examples/command/command.cpp | 7 +++++++ examples/server/server.cpp | 8 ++++++++ examples/stream/stream.cpp | 2 +- examples/talk-llama/talk-llama.cpp | 19 ++++--------------- ggml/include/ggml-backend.h | 2 +- include/whisper.h | 5 +++++ src/whisper.cpp | 6 ++++++ 9 files changed, 34 insertions(+), 47 deletions(-) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index c9603ca0318..3f85950e17e 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -11,9 +11,6 @@ struct whisper_params { int32_t what = 0; // what to benchmark: 0 - whisper encoder, 1 - memcpy, 2 - ggml_mul_mat std::string model = "models/ggml-base.en.bin"; - #ifdef GGML_BACKEND_DL - std::string device = ""; - #endif bool use_gpu = true; bool flash_attn = false; @@ -31,9 +28,6 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params } else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); } else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; } - #ifdef GGML_BACKEND_DL - else if (arg == "-d" || arg == "--device") { params.device = argv[++i]; } - #endif else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); } else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; } else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; } @@ -55,12 +49,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para fprintf(stderr, " -h, --help [default] show this help message and exit\n"); fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads); fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str()); - #ifdef GGML_BACKEND_DL - fprintf(stderr, " -d DEVICE, --device DEVICE [%-7s] device type\n" , params.device.c_str()); - fprintf(stderr, " valid devices : blas, cann, cpu, cuda, hip, kompute,\n"); - fprintf(stderr, " musa, opencl, rpc, sycl and vulkan\n"); - fprintf(stderr, " Optional libraries must be supplied\n"); - #endif fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what); fprintf(stderr, " %-7s 0 - whisper\n", ""); fprintf(stderr, " %-7s 1 - memcpy\n", ""); @@ -77,28 +65,12 @@ static int whisper_bench_full(const whisper_params & params) { // the model is initialised in whisper_init_from_file_with_params // Failure to do this will result in attempts to query null devices #ifdef GGML_BACKEND_DL - // If params.device is "" then load all devices otherwise just load named - // device (and hope they got it right). Really should check against valid - // device names - if (params.device.empty()) { - ggml_backend_load_all(); - } else { - if(ggml_backend_load_best(params.device.c_str(), true, nullptr) == nullptr) { - fprintf(stderr, "error: could not load device %s\n", params.device.c_str()); - return 5; - } - ggml_backend_load_best("cpu", true, nullptr); - } + whisper_backend_load_all(); #endif struct whisper_context_params cparams = whisper_context_default_params(); - #ifdef GGML_BACKEND_DL - // Always allow GPU if GGML_BACKEND_DL as it can be overriden or only choice - cparams.use_gpu = true; - #else cparams.use_gpu = params.use_gpu; - #endif cparams.flash_attn = params.flash_attn; struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams); diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index 1e000643ddd..100407d7a86 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -1006,7 +1006,7 @@ int main(int argc, char ** argv) { // the model is initialised in whisper_init_from_file_with_params // Failure to do this will result in attempts to query null devices #ifdef GGML_BACKEND_DL - ggml_backend_load_all(); + whisper_backend_load_all(); #endif // whisper init diff --git a/examples/command/command.cpp b/examples/command/command.cpp index 9dc8f629995..4fd27d8d523 100644 --- a/examples/command/command.cpp +++ b/examples/command/command.cpp @@ -690,6 +690,13 @@ int main(int argc, char ** argv) { exit(0); } + // If we're using a GGML_BACKEND_DL build we need to load backends before + // the model is initialised in whisper_init_from_file_with_params + // Failure to do this will result in attempts to query null devices + #ifdef GGML_BACKEND_DL + whisper_backend_load_all(); + #endif + // whisper init struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 38da61673df..b21dd81cab3 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -543,6 +543,14 @@ int main(int argc, char ** argv) { if (sparams.ffmpeg_converter) { check_ffmpeg_availibility(); } + + // If we're using a GGML_BACKEND_DL build we need to load backends before + // the model is initialised in whisper_init_from_file_with_params + // Failure to do this will result in attempts to query null devices + #ifdef GGML_BACKEND_DL + whisper_backend_load_all(); + #endif + // whisper init struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index ce192b472d9..d28a320423b 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -159,7 +159,7 @@ int main(int argc, char ** argv) { // the model is initialised in whisper_init_from_file_with_params // Failure to do this will result in attempts to query null devices #ifdef GGML_BACKEND_DL - ggml_backend_load_all(); + whisper_backend_load_all(); #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp index 44da2d5ee9c..eedb10a9c6d 100644 --- a/examples/talk-llama/talk-llama.cpp +++ b/examples/talk-llama/talk-llama.cpp @@ -7,13 +7,11 @@ #include "whisper.h" #include "llama.h" -#include -#include -#include #include #include #include #include +#include #include #include #include @@ -37,15 +35,6 @@ static std::vector llama_tokenize(struct llama_context * ctx, const return result; } -static std::string llama_time_now(void) { - auto t = std::time(nullptr); - auto tm = *std::localtime(&t); - - std::ostringstream oss; - oss << std::put_time(&tm, "%H:%M:%S"); - return oss.str(); -} - static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) { const llama_model * model = llama_get_model(ctx); const llama_vocab * vocab = llama_model_get_vocab(model); @@ -300,7 +289,7 @@ int main(int argc, char ** argv) { // the model is initialised in whisper_init_from_file_with_params // Failure to do this will result in attempts to query null devices #ifdef GGML_BACKEND_DL - ggml_backend_load_all(); + whisper_backend_load_all(); #endif // whisper init @@ -542,7 +531,7 @@ int main(int argc, char ** argv) { } printf("\n"); - printf("%s[%s]%s", params.person.c_str(), llama_time_now().c_str(), chat_symb.c_str()); + printf("%s%s", params.person.c_str(), chat_symb.c_str()); fflush(stdout); // clear audio buffer @@ -654,7 +643,7 @@ int main(int argc, char ** argv) { force_speak = false; text_heard.insert(0, 1, ' '); - text_heard += "\n" + params.bot_name + "[" + llama_time_now().c_str() + "]" + chat_symb; + text_heard += "\n" + params.bot_name + chat_symb; fprintf(stdout, "%s%s%s", "\033[1m", text_heard.c_str(), "\033[0m"); fflush(stdout); diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index 2a2e739554f..0a035a90a84 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -349,7 +349,7 @@ extern "C" { GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void); - // Temp fix now + // Expose ggml_backend_load_best for external use GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path); #ifdef __cplusplus diff --git a/include/whisper.h b/include/whisper.h index 1e1375033ad..f250e14d1a4 100644 --- a/include/whisper.h +++ b/include/whisper.h @@ -668,6 +668,11 @@ extern "C" { // Get the no_speech probability for the specified segment WHISPER_API float whisper_full_get_segment_no_speech_prob (struct whisper_context * ctx, int i_segment); WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment); + + #ifdef GGML_BACKEND_DL + WHISPER_API void whisper_backend_load_all(void); + #endif + #ifdef __cplusplus } #endif diff --git a/src/whisper.cpp b/src/whisper.cpp index cf09084a5c6..6ed5bdbc8e2 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -7545,3 +7545,9 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text fputs(text, stderr); fflush(stderr); } + +#ifdef GGML_BACKEND_DL +static void whisper_backend_load_all(void) { + ggml_backend_load_all(); +} +#endif From 18edf34ebd96c2626b4eb183829603b02e7a90aa Mon Sep 17 00:00:00 2001 From: peardox Date: Fri, 18 Apr 2025 19:22:49 +0100 Subject: [PATCH 14/14] Tidy up and play nice --- ggml/src/ggml-backend-reg.cpp | 2 +- src/whisper.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 405d8e31514..d1ac9899a38 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -485,7 +485,7 @@ static fs::path backend_filename_extension() { #endif } -static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) { +ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) { // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths const fs::path name_path = fs::u8path(name); const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native(); diff --git a/src/whisper.cpp b/src/whisper.cpp index 6ed5bdbc8e2..e1b7016052b 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -7547,7 +7547,7 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text } #ifdef GGML_BACKEND_DL -static void whisper_backend_load_all(void) { +void whisper_backend_load_all(void) { ggml_backend_load_all(); } #endif