Skip to content

Commit c83ad6d

Browse files
ggml-backend : add device and backend reg interfaces (#9707)
Co-authored-by: Johannes Gäßler <[email protected]>
1 parent a39ab21 commit c83ad6d

28 files changed

+1769
-1263
lines changed

.github/workflows/bench.yml.disabled

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ on:
2727
push:
2828
branches:
2929
- master
30-
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
30+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
3131
pull_request_target:
3232
types: [opened, synchronize, reopened]
33-
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
33+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
3434
schedule:
3535
- cron: '04 2 * * *'
3636

Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,10 +1054,11 @@ ggml/src/ggml-alloc.o: \
10541054
$(CC) $(CFLAGS) -c $< -o $@
10551055

10561056
ggml/src/ggml-backend.o: \
1057-
ggml/src/ggml-backend.c \
1057+
ggml/src/ggml-backend.cpp \
1058+
ggml/src/ggml-backend-impl.h \
10581059
ggml/include/ggml.h \
10591060
ggml/include/ggml-backend.h
1060-
$(CC) $(CFLAGS) -c $< -o $@
1061+
$(CXX) $(CXXFLAGS) -c $< -o $@
10611062

10621063
ggml/src/ggml-quants.o: \
10631064
ggml/src/ggml-quants.c \

Package.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ var sources = [
1111
"src/unicode-data.cpp",
1212
"ggml/src/ggml.c",
1313
"ggml/src/ggml-alloc.c",
14-
"ggml/src/ggml-backend.c",
14+
"ggml/src/ggml-backend.cpp",
1515
"ggml/src/ggml-quants.c",
1616
"ggml/src/ggml-aarch64.c",
1717
]

ggml/include/ggml-backend.h

Lines changed: 144 additions & 59 deletions
Large diffs are not rendered by default.

ggml/include/ggml-blas.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ extern "C" {
99
#endif
1010

1111
// backend API
12-
GGML_API GGML_CALL ggml_backend_t ggml_backend_blas_init(void);
12+
GGML_API ggml_backend_t ggml_backend_blas_init(void);
1313

14-
GGML_API GGML_CALL bool ggml_backend_is_blas(ggml_backend_t backend);
14+
GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);
1515

1616
// number of threads used for conversion to float
1717
// for openblas and blis, this will also set the number of threads used for blas operations
18-
GGML_API GGML_CALL void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
18+
GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
1919

2020

2121
#ifdef __cplusplus

ggml/include/ggml-cann.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ extern "C" {
4444
* @param device The index of the device to initialize.
4545
* @return A pointer to the initialized backend instance, or nullptr on failure.
4646
*/
47-
GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
47+
GGML_API ggml_backend_t ggml_backend_cann_init(int32_t device);
4848

4949
/**
5050
* @brief Checks if a given backend is a CANN backend.
@@ -55,7 +55,7 @@ GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
5555
* @param backend The backend instance to check.
5656
* @return True if the backend is a CANN backend, false otherwise.
5757
*/
58-
GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
58+
GGML_API bool ggml_backend_is_cann(ggml_backend_t backend);
5959

6060
/**
6161
* @brief Retrieves the CANN buffer type for a specified device.
@@ -67,7 +67,7 @@ GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
6767
* @return A pointer to the buffer type interface for the specified device, or
6868
* nullptr if the device index is out of range.
6969
*/
70-
GGML_API GGML_CALL ggml_backend_buffer_type_t
70+
GGML_API ggml_backend_buffer_type_t
7171
ggml_backend_cann_buffer_type(int32_t device);
7272

7373
/**
@@ -78,14 +78,14 @@ ggml_backend_cann_buffer_type(int32_t device);
7878
*
7979
* @return The number of CANN devices available.
8080
*/
81-
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
81+
GGML_API int32_t ggml_backend_cann_get_device_count(void);
8282

8383
/**
8484
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
8585
*
8686
* @return A pointer to the host buffer type interface.
8787
*/
88-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
88+
GGML_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
8989

9090
/**
9191
* @brief Retrieves the description of a specific CANN device.
@@ -97,7 +97,7 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type
9797
* @param description Pointer to a buffer where the description will be written.
9898
* @param description_size Size of the description buffer.
9999
*/
100-
GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
100+
GGML_API void ggml_backend_cann_get_device_description(
101101
int32_t device, char* description, size_t description_size);
102102

103103
/**
@@ -112,9 +112,9 @@ GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
112112
* @param total Pointer to a variable where the total memory size will be
113113
* stored.
114114
*/
115-
GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device,
116-
size_t* free,
117-
size_t* total);
115+
GGML_API void ggml_backend_cann_get_device_memory(int32_t device,
116+
size_t* free,
117+
size_t* total);
118118

119119
/**
120120
* @brief Set the logging callback for GGML.

ggml/include/ggml-cuda.h

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
#include "ggml.h"
44
#include "ggml-backend.h"
55

6+
#ifdef __cplusplus
7+
extern "C" {
8+
#endif
9+
610
#ifdef GGML_USE_HIPBLAS
711
#define GGML_CUDA_NAME "ROCm"
812
#define GGML_CUBLAS_NAME "hipBLAS"
@@ -13,35 +17,33 @@
1317
#define GGML_CUDA_NAME "CUDA"
1418
#define GGML_CUBLAS_NAME "cuBLAS"
1519
#endif
16-
17-
#ifdef __cplusplus
18-
extern "C" {
19-
#endif
20-
2120
#define GGML_CUDA_MAX_DEVICES 16
2221

2322
// backend API
24-
GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device);
23+
GGML_API ggml_backend_t ggml_backend_cuda_init(int device);
2524

26-
GGML_API GGML_CALL bool ggml_backend_is_cuda(ggml_backend_t backend);
25+
GGML_API bool ggml_backend_is_cuda(ggml_backend_t backend);
2726

2827
// device buffer
29-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
28+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
3029

3130
// split tensor buffer that splits matrices by rows across multiple devices
32-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
31+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
3332

3433
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
35-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
34+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
3635

37-
GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void);
38-
GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
39-
GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
36+
GGML_API int ggml_backend_cuda_get_device_count(void);
37+
GGML_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
38+
GGML_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
4039

41-
GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
42-
GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer);
40+
GGML_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
41+
GGML_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
4342

4443
GGML_API void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data);
44+
45+
GGML_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
46+
4547
#ifdef __cplusplus
4648
}
4749
#endif

ggml/include/ggml-metal.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// Note: this description is outdated
2+
//
13
// An interface allowing to compute ggml_cgraph with Metal
24
//
35
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
@@ -43,11 +45,11 @@ GGML_API ggml_backend_t ggml_backend_metal_init(void);
4345

4446
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
4547

46-
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
48+
GGML_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
4749

4850
GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
4951

50-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
52+
GGML_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
5153

5254
// helper to check if the device supports a specific family
5355
// ideally, the user code should be doing these checks

ggml/include/ggml-rpc.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@ extern "C" {
1010
#define GGML_RPC_MAX_SERVERS 16
1111

1212
// backend API
13-
GGML_API GGML_CALL ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
14-
GGML_API GGML_CALL bool ggml_backend_is_rpc(ggml_backend_t backend);
13+
GGML_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
14+
GGML_API bool ggml_backend_is_rpc(ggml_backend_t backend);
1515

16-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
16+
GGML_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
1717

18-
GGML_API GGML_CALL void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
18+
GGML_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
1919

20-
GGML_API GGML_CALL void start_rpc_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
20+
GGML_API void start_rpc_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
2121

2222
#ifdef __cplusplus
2323
}

ggml/include/ggml-sycl.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,20 @@ GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
2323
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
2424

2525
// split tensor buffer that splits matrices by rows across multiple devices
26-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
26+
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
2727

2828
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
2929
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
3030

31-
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
32-
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
33-
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
34-
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
35-
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
31+
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
32+
GGML_API void ggml_sycl_get_gpu_list(int *id_list, int max_len);
33+
GGML_API void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
34+
GGML_API int ggml_backend_sycl_get_device_count();
35+
GGML_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
3636

3737
// SYCL doesn't support registering host memory, keep here for reference
38-
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
39-
// GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);
38+
// GGML_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
39+
// GGML_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
4040
#ifdef __cplusplus
4141
}
4242
#endif

0 commit comments

Comments
 (0)