Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 16 additions & 9 deletions examples/bench/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,21 +46,28 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "options:\n");
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
fprintf(stderr, " %-7s 0 - whisper\n", "");
fprintf(stderr, " %-7s 1 - memcpy\n", "");
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false");
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
fprintf(stderr, " %-7s 0 - whisper\n", "");
fprintf(stderr, " %-7s 1 - memcpy\n", "");
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
fprintf(stderr, " -fa, --flash-attn [%-7s] enable flash attention\n", params.flash_attn ? "true" : "false");
fprintf(stderr, "\n");
}

static int whisper_bench_full(const whisper_params & params) {
// whisper init

// If we're using a GGML_BACKEND_DL build we need to load backends before
// the model is initialised in whisper_init_from_file_with_params
// Failure to do this will result in attempts to query null devices
#ifdef GGML_BACKEND_DL
whisper_backend_load_all();
#endif

struct whisper_context_params cparams = whisper_context_default_params();

cparams.use_gpu = params.use_gpu;
Expand Down
7 changes: 7 additions & 0 deletions examples/cli/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,13 @@ int main(int argc, char ** argv) {
whisper_log_set(cb_log_disable, NULL);
}

// If we're using a GGML_BACKEND_DL build we need to load backends before
// the model is initialised in whisper_init_from_file_with_params
// Failure to do this will result in attempts to query null devices
#ifdef GGML_BACKEND_DL
whisper_backend_load_all();
#endif

// whisper init

struct whisper_context_params cparams = whisper_context_default_params();
Expand Down
7 changes: 7 additions & 0 deletions examples/command/command.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,13 @@ int main(int argc, char ** argv) {
exit(0);
}

// If we're using a GGML_BACKEND_DL build we need to load backends before
// the model is initialised in whisper_init_from_file_with_params
// Failure to do this will result in attempts to query null devices
#ifdef GGML_BACKEND_DL
whisper_backend_load_all();
#endif

// whisper init

struct whisper_context_params cparams = whisper_context_default_params();
Expand Down
8 changes: 8 additions & 0 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,14 @@ int main(int argc, char ** argv) {
if (sparams.ffmpeg_converter) {
check_ffmpeg_availibility();
}

// If we're using a GGML_BACKEND_DL build we need to load backends before
// the model is initialised in whisper_init_from_file_with_params
// Failure to do this will result in attempts to query null devices
#ifdef GGML_BACKEND_DL
whisper_backend_load_all();
#endif

// whisper init
struct whisper_context_params cparams = whisper_context_default_params();

Expand Down
7 changes: 7 additions & 0 deletions examples/stream/stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,13 @@ int main(int argc, char ** argv) {
exit(0);
}

// If we're using a GGML_BACKEND_DL build we need to load backends before
// the model is initialised in whisper_init_from_file_with_params
// Failure to do this will result in attempts to query null devices
#ifdef GGML_BACKEND_DL
whisper_backend_load_all();
#endif

struct whisper_context_params cparams = whisper_context_default_params();

cparams.use_gpu = params.use_gpu;
Expand Down
7 changes: 7 additions & 0 deletions examples/talk-llama/talk-llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,13 @@ int main(int argc, char ** argv) {
exit(0);
}

// If we're using a GGML_BACKEND_DL build we need to load backends before
// the model is initialised in whisper_init_from_file_with_params
// Failure to do this will result in attempts to query null devices
#ifdef GGML_BACKEND_DL
whisper_backend_load_all();
#endif

// whisper init

struct whisper_context_params cparams = whisper_context_default_params();
Expand Down
3 changes: 3 additions & 0 deletions ggml/include/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,9 @@ extern "C" {
// CPU buffer types are always available
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);

// Expose ggml_backend_load_best for external use
GGML_API ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path);

#ifdef __cplusplus
}
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ static fs::path backend_filename_extension() {
#endif
}

static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
const fs::path name_path = fs::u8path(name);
const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
Expand Down
5 changes: 5 additions & 0 deletions include/whisper.h
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,11 @@ extern "C" {
// Get the no_speech probability for the specified segment
WHISPER_API float whisper_full_get_segment_no_speech_prob (struct whisper_context * ctx, int i_segment);
WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment);

#ifdef GGML_BACKEND_DL
WHISPER_API void whisper_backend_load_all(void);
#endif

#ifdef __cplusplus
}
#endif
Expand Down
31 changes: 16 additions & 15 deletions src/whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,15 +208,6 @@ static bool ggml_graph_compute_helper(
return t;
}

static void whisper_load_backends() {
#ifdef GGML_BACKEND_DL
static std::once_flag flag;
std::call_once(flag, []() {
ggml_backend_load_all();
});
#endif
}

// TODO: move these functions to ggml-base with support for ggml-backend?

static ggml_tensor * whisper_set_f32(struct ggml_tensor * t, float v) {
Expand Down Expand Up @@ -1313,8 +1304,6 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) {
static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) {
ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);

whisper_load_backends();

ggml_backend_dev_t dev = nullptr;

int cnt = 0;
Expand Down Expand Up @@ -1372,6 +1361,10 @@ static std::vector<ggml_backend_t> whisper_backend_init(const whisper_context_pa

ggml_backend_t backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
if (backend_cpu == nullptr) {
#ifdef GGML_BACKEND_DL
// If not using a load_all it is possible CPU is null
return result;
#endif
throw std::runtime_error("failed to initialize CPU backend");
}
result.push_back(backend_cpu);
Expand Down Expand Up @@ -1407,6 +1400,12 @@ static buft_list_t make_buft_list(whisper_context_params & params) {

// CPU Extra
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
#ifdef GGML_BACKEND_DL
// If not using a load_all it is possible CPU is null
if(cpu_dev == nullptr) {
return buft_list;
}
#endif
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
auto get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
ggml_backend_reg_get_proc_address(cpu_reg, "ggml_backend_dev_get_extra_bufts");
Expand Down Expand Up @@ -4321,8 +4320,6 @@ static int whisper_has_openvino(void) {
const char * whisper_print_system_info(void) {
static std::string s;

whisper_load_backends();

s = "";
s += "WHISPER : ";
s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
Expand Down Expand Up @@ -6776,8 +6773,6 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
}

WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
whisper_load_backends();

static std::string s;
s = "";
char strbuf[256];
Expand Down Expand Up @@ -7550,3 +7545,9 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text
fputs(text, stderr);
fflush(stderr);
}

#ifdef GGML_BACKEND_DL
void whisper_backend_load_all(void) {
ggml_backend_load_all();
}
#endif