@@ -33,7 +33,7 @@ type TokenUsage struct {
3333 TimingTokenGeneration float64
3434}
3535
36- func ModelInference (ctx context.Context , s string , messages []schema.Message , images , videos , audios []string , loader * model.ModelLoader , c config.BackendConfig , o * config.ApplicationConfig , tokenCallback func (string , TokenUsage ) bool ) (func () (LLMResponse , error ), error ) {
36+ func ModelInference (ctx context.Context , s string , messages []schema.Message , images , videos , audios []string , loader * model.ModelLoader , c * config.BackendConfig , o * config.ApplicationConfig , tokenCallback func (string , TokenUsage ) bool ) (func () (LLMResponse , error ), error ) {
3737 modelFile := c .Model
3838
3939 // Check if the modelFile exists, if it doesn't try to load it from the gallery
@@ -48,7 +48,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
4848 }
4949 }
5050
51- opts := ModelOptions (c , o )
51+ opts := ModelOptions (* c , o )
5252 inferenceModel , err := loader .Load (opts ... )
5353 if err != nil {
5454 return nil , err
@@ -84,7 +84,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
8484
8585 // in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
8686 fn := func () (LLMResponse , error ) {
87- opts := gRPCPredictOpts (c , loader .ModelPath )
87+ opts := gRPCPredictOpts (* c , loader .ModelPath )
8888 opts .Prompt = s
8989 opts .Messages = protoMessages
9090 opts .UseTokenizerTemplate = c .TemplateConfig .UseTokenizerTemplate
0 commit comments