Skip to content

Commit 62a1e4e

Browse files
committed
feat(backends): Drop bert.cpp
use llama.cpp 3.2 as a drop-in replacement for bert.cpp Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent e8128a3 commit 62a1e4e

File tree

13 files changed

+36
-180
lines changed

13 files changed

+36
-180
lines changed

Makefile

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@ CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445
1414
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
1515
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
1616

17-
# bert.cpp version
18-
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
19-
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
20-
2117
# go-piper version
2218
PIPER_REPO?=https://github.com/mudler/go-piper
2319
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
@@ -198,7 +194,6 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
198194
endif
199195

200196
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
201-
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
202197
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
203198
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
204199
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
@@ -228,19 +223,6 @@ endif
228223

229224
all: help
230225

231-
## BERT embeddings
232-
sources/go-bert.cpp:
233-
mkdir -p sources/go-bert.cpp
234-
cd sources/go-bert.cpp && \
235-
git init && \
236-
git remote add origin $(BERT_REPO) && \
237-
git fetch origin && \
238-
git checkout $(BERT_VERSION) && \
239-
git submodule update --init --recursive --depth 1 --single-branch
240-
241-
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
242-
$(MAKE) -C sources/go-bert.cpp libgobert.a
243-
244226
## go-llama.cpp
245227
sources/go-llama.cpp:
246228
mkdir -p sources/go-llama.cpp
@@ -320,12 +302,11 @@ sources/whisper.cpp:
320302
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
321303
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
322304

323-
get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
305+
get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
324306

325307
replace:
326308
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
327309
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
328-
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
329310
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
330311
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
331312
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
@@ -334,7 +315,6 @@ replace:
334315
dropreplace:
335316
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
336317
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
337-
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
338318
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
339319
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
340320
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
@@ -349,7 +329,6 @@ rebuild: ## Rebuilds the project
349329
$(MAKE) -C sources/go-llama.cpp clean
350330
$(MAKE) -C sources/whisper.cpp clean
351331
$(MAKE) -C sources/go-stable-diffusion clean
352-
$(MAKE) -C sources/go-bert.cpp clean
353332
$(MAKE) -C sources/go-piper clean
354333
$(MAKE) -C sources/go-tiny-dream clean
355334
$(MAKE) build
@@ -707,13 +686,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
707686
backend-assets/grpc: protogen-go replace
708687
mkdir -p backend-assets/grpc
709688

710-
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
711-
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
712-
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
713-
ifneq ($(UPX),)
714-
$(UPX) backend-assets/grpc/bert-embeddings
715-
endif
716-
717689
backend-assets/grpc/huggingface: backend-assets/grpc
718690
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
719691
ifneq ($(UPX),)

aio/cpu/embeddings.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: text-embedding-ada-002
2-
backend: bert-embeddings
2+
embeddings: true
33
parameters:
4-
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
4+
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
55

66
usage: |
77
You can test this model with curl like this:

backend/go/llm/bert/bert.go

Lines changed: 0 additions & 34 deletions
This file was deleted.

backend/go/llm/bert/main.go

Lines changed: 0 additions & 21 deletions
This file was deleted.

core/gallery/models_test.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212
"gopkg.in/yaml.v3"
1313
)
1414

15+
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
16+
1517
var _ = Describe("Model test", func() {
1618

1719
Context("Downloading", func() {
@@ -47,7 +49,7 @@ var _ = Describe("Model test", func() {
4749

4850
gallery := []GalleryModel{{
4951
Name: "bert",
50-
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
52+
URL: bertEmbeddingsURL,
5153
}}
5254
out, err := yaml.Marshal(gallery)
5355
Expect(err).ToNot(HaveOccurred())
@@ -66,7 +68,7 @@ var _ = Describe("Model test", func() {
6668
Expect(err).ToNot(HaveOccurred())
6769
Expect(len(models)).To(Equal(1))
6870
Expect(models[0].Name).To(Equal("bert"))
69-
Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
71+
Expect(models[0].URL).To(Equal(bertEmbeddingsURL))
7072
Expect(models[0].Installed).To(BeFalse())
7173

7274
err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
@@ -78,7 +80,7 @@ var _ = Describe("Model test", func() {
7880
content := map[string]interface{}{}
7981
err = yaml.Unmarshal(dat, &content)
8082
Expect(err).ToNot(HaveOccurred())
81-
Expect(content["backend"]).To(Equal("bert-embeddings"))
83+
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
8284

8385
models, err = AvailableGalleryModels(galleries, tempdir)
8486
Expect(err).ToNot(HaveOccurred())

core/http/app_test.go

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,8 @@ func postInvalidRequest(url string) (error, int) {
240240
return nil, resp.StatusCode
241241
}
242242

243+
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
244+
243245
//go:embed backend-assets/*
244246
var backendAssets embed.FS
245247

@@ -279,13 +281,13 @@ var _ = Describe("API test", func() {
279281
g := []gallery.GalleryModel{
280282
{
281283
Name: "bert",
282-
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
284+
URL: bertEmbeddingsURL,
283285
},
284286
{
285287
Name: "bert2",
286-
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
288+
URL: bertEmbeddingsURL,
287289
Overrides: map[string]interface{}{"foo": "bar"},
288-
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
290+
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
289291
},
290292
}
291293
out, err := yaml.Marshal(g)
@@ -383,7 +385,7 @@ var _ = Describe("API test", func() {
383385
content := map[string]interface{}{}
384386
err = yaml.Unmarshal(dat, &content)
385387
Expect(err).ToNot(HaveOccurred())
386-
Expect(content["backend"]).To(Equal("bert-embeddings"))
388+
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
387389
Expect(content["foo"]).To(Equal("bar"))
388390

389391
models, err = getModels("http://127.0.0.1:9090/models/available")
@@ -402,7 +404,7 @@ var _ = Describe("API test", func() {
402404
It("overrides models", func() {
403405

404406
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
405-
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
407+
URL: bertEmbeddingsURL,
406408
Name: "bert",
407409
Overrides: map[string]interface{}{
408410
"backend": "llama",
@@ -451,7 +453,7 @@ var _ = Describe("API test", func() {
451453
})
452454
It("apply models without overrides", func() {
453455
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
454-
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
456+
URL: bertEmbeddingsURL,
455457
Name: "bert",
456458
Overrides: map[string]interface{}{},
457459
})
@@ -471,7 +473,7 @@ var _ = Describe("API test", func() {
471473
content := map[string]interface{}{}
472474
err = yaml.Unmarshal(dat, &content)
473475
Expect(err).ToNot(HaveOccurred())
474-
Expect(content["backend"]).To(Equal("bert-embeddings"))
476+
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
475477
})
476478

477479
It("runs openllama(llama-ggml backend)", Label("llama"), func() {

docs/content/docs/features/embeddings.md

Lines changed: 11 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -27,39 +27,6 @@ embeddings: true
2727
# .. other parameters
2828
```
2929

30-
## Bert embeddings
31-
32-
To use `bert.cpp` models you can use the `bert` embedding backend.
33-
34-
An example model config file:
35-
36-
```yaml
37-
name: text-embedding-ada-002
38-
parameters:
39-
model: bert
40-
backend: bert-embeddings
41-
embeddings: true
42-
# .. other parameters
43-
```
44-
45-
The `bert` backend uses [bert.cpp](https://github.com/skeskinen/bert.cpp) and uses `ggml` models.
46-
47-
For instance you can download the `ggml` quantized version of `all-MiniLM-L6-v2` from https://huggingface.co/skeskinen/ggml:
48-
49-
```bash
50-
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
51-
```
52-
53-
To test locally (LocalAI server running on `localhost`),
54-
you can use `curl` (and `jq` at the end to prettify):
55-
56-
```bash
57-
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
58-
"input": "Your text string goes here",
59-
"model": "text-embedding-ada-002"
60-
}' | jq "."
61-
```
62-
6330
## Huggingface embeddings
6431

6532
To use `sentence-transformers` and models in `huggingface` you can use the `sentencetransformers` embedding backend.
@@ -87,17 +54,26 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g
8754

8855
## Llama.cpp embeddings
8956

90-
Embeddings with `llama.cpp` are supported with the `llama` backend.
57+
Embeddings with `llama.cpp` are supported with the `llama-cpp` backend, it needs to be enabled with `embeddings` set to `true`.
9158

9259
```yaml
9360
name: my-awesome-model
94-
backend: llama
61+
backend: llama-cpp
9562
embeddings: true
9663
parameters:
9764
model: ggml-file.bin
9865
# ...
9966
```
10067

68+
Then you can use the API to generate embeddings:
69+
70+
```bash
71+
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
72+
"input": "My text",
73+
"model": "my-awesome-model"
74+
}' | jq "."
75+
```
76+
10177
## 💡 Examples
10278

10379
- Example that uses LLamaIndex and LocalAI as embedding: [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).

docs/content/docs/features/model-gallery.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
300300

301301
```bash
302302
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
303-
"url": "github:mudler/LocalAI/gallery/bert-embeddings.yaml",
303+
"id": "bert-embeddings",
304304
"name": "text-embedding-ada-002"
305305
}'
306306
```

embedded/models/bert-cpp.yaml

Lines changed: 0 additions & 23 deletions
This file was deleted.

gallery/bert-embeddings.yaml

Lines changed: 0 additions & 12 deletions
This file was deleted.

0 commit comments

Comments
 (0)