manual merge

dave-gray101 · dave-gray101 · commit 6050d80ea026 · 2025-02-05T14:12:57.000-05:00
Signed-off-by: Dave Lee &lt;dave@gray101.com&gt;
diff --git a/.github/workflows/notify-models.yaml b/.github/workflows/notify-models.yaml
@@ -18,7 +18,7 @@ jobs:
       with:
         model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
         # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
+    - uses: GrantBirki/git-diff-action@v2.8.0
       id: git-diff-action
       with:
             json_diff_file_output: diff.json
@@ -99,7 +99,7 @@ jobs:
         docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
         until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
       # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
+    - uses: GrantBirki/git-diff-action@v2.8.0
       id: git-diff-action
       with:
             json_diff_file_output: diff.json
diff --git a/Makefile b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=53debe6f3c9cca87e9520a83ee8c14d88977afa4
+CPPLLAMA_VERSION?=3ec9fd4b77b6aca03a3c2bf678eae3f9517d6904
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
diff --git a/backend/backend.proto b/backend/backend.proto
@@ -163,6 +163,11 @@ message Reply {
   double timing_token_generation = 5;
 }
 
+message GrammarTrigger {
+  string word = 1;
+  bool at_start = 2; 
+}
+
 message ModelOptions {
   string Model = 1;
   int32 ContextSize = 2;
@@ -247,6 +252,8 @@ message ModelOptions {
 
   string CacheTypeKey = 63;
   string CacheTypeValue = 64;
+
+  repeated GrammarTrigger GrammarTriggers = 65;
 }
 
 message Result {
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
@@ -468,6 +468,9 @@ struct llama_server_context
     bool add_bos_token      = true;
     bool has_eos_token      = true;
 
+    bool grammar_lazy = false;
+    std::vector<common_grammar_trigger> grammar_trigger_words;
+
     int32_t n_ctx;  // total context for all clients / slots
 
     // system prompt
@@ -706,6 +709,8 @@ struct llama_server_context
         slot->sparams.grammar           = json_value(data, "grammar",           default_sparams.grammar);
         slot->sparams.n_probs           = json_value(data, "n_probs",           default_sparams.n_probs);
         slot->sparams.min_keep          = json_value(data, "min_keep",          default_sparams.min_keep);
+        slot->sparams.grammar_trigger_words = grammar_trigger_words;
+        slot->sparams.grammar_lazy = grammar_lazy;
 
         if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
             // Might be better to reject the request with a 400 ?
@@ -2374,6 +2379,21 @@ static void params_parse(const backend::ModelOptions* request,
     if ( request->ropefreqscale() != 0.0f ) {
         params.rope_freq_scale = request->ropefreqscale();
     }
+
+    if (request->grammartriggers_size() > 0) {
+        LOG_INFO("configuring grammar triggers", {});
+        llama.grammar_lazy = true;
+        for (int i = 0; i < request->grammartriggers_size(); i++) {
+            common_grammar_trigger trigger;
+            trigger.word = request->grammartriggers(i).word();
+            trigger.at_start = request->grammartriggers(i).at_start();
+            llama.grammar_trigger_words.push_back(trigger);
+            LOG_INFO("grammar trigger", {
+                { "word", trigger.word },
+                { "at_start", trigger.at_start }
+            });
+        }
+    }
 }
 
 
@@ -2522,6 +2542,18 @@ class BackendServiceImpl final : public backend::Backend::Service {
         return grpc::Status::OK;
     }
 
+    grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
+         json data = parse_options(false, request, llama);
+
+         std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
+
+         for (int i=0 ; i< tokens.size(); i++){
+            response->add_tokens(tokens[i]);
+         }
+
+        return grpc::Status::OK;
+    }
+
     grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
         llama_client_slot* active_slot = llama.get_active_slot();
 
diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt
@@ -5,4 +5,4 @@ accelerate
 transformers
 bitsandbytes
 outetts
-sentence-transformers==3.4.0
+sentence-transformers==3.4.1
diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt
@@ -6,4 +6,4 @@ accelerate
 transformers
 bitsandbytes
 outetts
-sentence-transformers==3.4.0
+sentence-transformers==3.4.1
diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt
@@ -5,4 +5,4 @@ numba==0.60.0
 transformers
 bitsandbytes
 outetts
-sentence-transformers==3.4.0
+sentence-transformers==3.4.1
diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt
@@ -7,4 +7,4 @@ numba==0.60.0
 bitsandbytes
 outetts
 bitsandbytes
-sentence-transformers==3.4.0
+sentence-transformers==3.4.1
diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt
@@ -8,4 +8,4 @@ numba==0.60.0
 intel-extension-for-transformers
 bitsandbytes
 outetts
-sentence-transformers==3.4.0
+sentence-transformers==3.4.1
diff --git a/core/backend/options.go b/core/backend/options.go
@@ -118,9 +118,19 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		nGPULayers = *c.NGPULayers
 	}
 
+	triggers := make([]*pb.GrammarTrigger, 0)
+	for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
+		triggers = append(triggers, &pb.GrammarTrigger{
+			Word:    t.Word,
+			AtStart: t.AtStart,
+		})
+
+	}
+
 	return &pb.ModelOptions{
 		CUDA:                 c.CUDA || c.Diffusers.CUDA,
 		SchedulerType:        c.Diffusers.SchedulerType,
+		GrammarTriggers:      triggers,
 		PipelineType:         c.Diffusers.PipelineType,
 		CFGScale:             c.CFGScale,
 		LoraAdapter:          c.LoraAdapter,
diff --git a/core/backend/tokenize.go b/core/backend/tokenize.go
@@ -34,6 +34,10 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
 		return schema.TokenizeResponse{}, err
 	}
 
+	if resp.Tokens == nil {
+		resp.Tokens = make([]int32, 0)
+	}
+
 	return schema.TokenizeResponse{
 		Tokens: resp.Tokens,
 	}, nil
diff --git a/core/gallery/models_test.go b/core/gallery/models_test.go
@@ -48,8 +48,10 @@ var _ = Describe("Model test", func() {
 			defer os.RemoveAll(tempdir)
 
 			gallery := []GalleryModel{{
-				Name: "bert",
-				URL:  bertEmbeddingsURL,
+				Metadata: Metadata{
+					Name: "bert",
+					URL:  bertEmbeddingsURL,
+				},
 			}}
 			out, err := yaml.Marshal(gallery)
 			Expect(err).ToNot(HaveOccurred())
diff --git a/core/gallery/request.go b/core/gallery/request.go
@@ -11,17 +11,21 @@ import (
 // It is used to install the model by resolving the URL and downloading the files.
 // The other fields are used to override the configuration of the model.
 type GalleryModel struct {
+	Metadata `json:",inline" yaml:",inline"`
+	// config_file is read in the situation where URL is blank - and therefore this is a base config.
+	ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
+	// Overrides are used to override the configuration of the model located at URL
+	Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
+}
+
+type Metadata struct {
 	URL         string   `json:"url,omitempty" yaml:"url,omitempty"`
 	Name        string   `json:"name,omitempty" yaml:"name,omitempty"`
 	Description string   `json:"description,omitempty"  yaml:"description,omitempty"`
 	License     string   `json:"license,omitempty"  yaml:"license,omitempty"`
 	URLs        []string `json:"urls,omitempty" yaml:"urls,omitempty"`
 	Icon        string   `json:"icon,omitempty" yaml:"icon,omitempty"`
 	Tags        []string `json:"tags,omitempty" yaml:"tags,omitempty"`
-	// config_file is read in the situation where URL is blank - and therefore this is a base config.
-	ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
-	// Overrides are used to override the configuration of the model located at URL
-	Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
 	// AdditionalFiles are used to add additional files to the model
 	AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
 	// Gallery is a reference to the gallery which contains the model
diff --git a/core/gallery/request_test.go b/core/gallery/request_test.go
@@ -9,7 +9,11 @@ import (
 var _ = Describe("Gallery API tests", func() {
 	Context("requests", func() {
 		It("parses github with a branch", func() {
-			req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
+			req := GalleryModel{
+				Metadata: Metadata{
+					URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main",
+				},
+			}
 			e, err := GetGalleryConfigFromURL(req.URL, "")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(e.Name).To(Equal("gpt4all-j"))
diff --git a/core/http/app_test.go b/core/http/app_test.go
@@ -299,14 +299,18 @@ var _ = Describe("API test", func() {
 
 			g := []gallery.GalleryModel{
 				{
-					Name: "bert",
-					URL:  bertEmbeddingsURL,
+					Metadata: gallery.Metadata{
+						Name: "bert",
+						URL:  bertEmbeddingsURL,
+					},
 				},
 				{
-					Name:            "bert2",
-					URL:             bertEmbeddingsURL,
-					Overrides:       map[string]interface{}{"foo": "bar"},
-					AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
+					Metadata: gallery.Metadata{
+						Name:            "bert2",
+						URL:             bertEmbeddingsURL,
+						AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
+					},
+					Overrides: map[string]interface{}{"foo": "bar"},
 				},
 			}
 			out, err := yaml.Marshal(g)
diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go
@@ -117,19 +117,25 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
 // @Router /models/available [get]
 func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
 
 		models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
 		if err != nil {
 			return err
 		}
-		log.Debug().Msgf("Models found from galleries: %+v", models)
-		for _, m := range models {
-			log.Debug().Msgf("Model found from galleries: %+v", m)
+
+		log.Debug().Msgf("Available %d models from %d galleries\n", len(models), len(mgs.galleries))
+
+		m := []gallery.Metadata{}
+
+		for _, mm := range models {
+			m = append(m, mm.Metadata)
 		}
-		dat, err := json.Marshal(models)
+
+		log.Debug().Msgf("Models %#v", m)
+
+		dat, err := json.Marshal(m)
 		if err != nil {
-			return err
+			return fmt.Errorf("could not marshal models: %w", err)
 		}
 		return c.Send(dat)
 	}
diff --git a/core/http/endpoints/localai/tokenize.go b/core/http/endpoints/localai/tokenize.go
@@ -11,6 +11,7 @@ import (
 
 // TokenizeEndpoint exposes a REST API to tokenize the content
 // @Summary Tokenize the input.
+// @Param request body schema.TokenizeRequest true "Request"
 // @Success 200 {object} schema.TokenizeResponse "Response"
 // @Router /v1/tokenize [post]
 func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
@@ -29,7 +30,6 @@ func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, app
 		if err != nil {
 			return err
 		}
-
 		return ctx.JSON(tokenResponse)
 	}
 }
diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
@@ -40,6 +40,10 @@ icon = "info"
 </a>
 </p>
 
+<p align="center">
+<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</p>
+
 <p align="center">
 <a href="https://twitter.com/LocalAI_API" target="blank">
 <img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
@@ -118,7 +122,24 @@ To help the project you can:
 
 ## 🌟 Star history
 
-[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)
+[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=mudler/LocalAI&type=Date)](https://star-history.com/#mudler/LocalAI&Date)
+
+## ❤️ Sponsors
+
+> Do you find LocalAI useful?
+
+Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
+
+A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
+
+<p align="center">
+  <a href="https://www.spectrocloud.com/" target="blank">
+    <img width=200 src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
+  </a>
+  <a href="https://www.premai.io/" target="blank">
+    <img  width=200 src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
+  </a>
+</p>
 
 ## 📖 License
 
diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
@@ -1 +1 @@
-Subproject commit 5bcb9fe5e61d2fbe702034a24425992fd2455b0a
+Subproject commit 66bc366c4727a958f3873f409550daa36932c03f
diff --git a/gallery/index.yaml b/gallery/index.yaml
diff --git a/gallery/llama3.2-fcall.yaml b/gallery/llama3.2-fcall.yaml
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
diff --git a/swagger/docs.go b/swagger/docs.go
diff --git a/swagger/swagger.json b/swagger/swagger.json
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml