diff --git a/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/Conversation.cs b/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/Conversation.cs index ac1efb04e..5ffdf1877 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/Conversation.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/Conversation.cs @@ -47,15 +47,19 @@ public class DialogElement public string Content { get; set; } = default!; [JsonPropertyName("secondary_content")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? SecondaryContent { get; set; } [JsonPropertyName("rich_content")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? RichContent { get; set; } [JsonPropertyName("secondary_rich_content")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? SecondaryRichContent { get; set; } [JsonPropertyName("payload")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? Payload { get; set; } public DialogElement() @@ -95,8 +99,17 @@ public class DialogMetaData public string MessageType { get; set; } = default!; [JsonPropertyName("function_name")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? FunctionName { get; set; } + [JsonPropertyName("function_args")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? FunctionArgs { get; set; } + + [JsonPropertyName("tool_call_id")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? ToolCallId { get; set; } + [JsonPropertyName("sender_id")] public string? SenderId { get; set; } diff --git a/src/Infrastructure/BotSharp.Abstraction/Hooks/HookProvider.cs b/src/Infrastructure/BotSharp.Abstraction/Hooks/HookProvider.cs index cedc7c2a5..6d713952f 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Hooks/HookProvider.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Hooks/HookProvider.cs @@ -1,25 +1,19 @@ using BotSharp.Abstraction.Conversations; using Microsoft.Extensions.DependencyInjection; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -namespace BotSharp.Abstraction.Hooks +namespace BotSharp.Abstraction.Hooks; + +public static class HookProvider { - public static class HookProvider + public static List GetHooks(this IServiceProvider services, string agentId) where T : IHookBase { - public static List GetHooks(this IServiceProvider services, string agentId) where T : IHookBase - { - var hooks = services.GetServices().Where(p => p.IsMatch(agentId)); - return hooks.ToList(); - } + var hooks = services.GetServices().Where(p => p.IsMatch(agentId)); + return hooks.ToList(); + } - public static List GetHooksOrderByPriority(this IServiceProvider services, string agentId) where T: IConversationHook - { - var hooks = services.GetServices().Where(p => p.IsMatch(agentId)); - return hooks.OrderBy(p => p.Priority).ToList(); - } + public static List GetHooksOrderByPriority(this IServiceProvider services, string agentId) where T: IConversationHook + { + var hooks = services.GetServices().Where(p => p.IsMatch(agentId)); + return hooks.OrderBy(p => p.Priority).ToList(); } } diff --git a/src/Infrastructure/BotSharp.Abstraction/Hooks/IHookBase.cs b/src/Infrastructure/BotSharp.Abstraction/Hooks/IHookBase.cs index 834577e9a..5e510ea5e 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Hooks/IHookBase.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Hooks/IHookBase.cs @@ -1,17 +1,10 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +namespace BotSharp.Abstraction.Hooks; -namespace BotSharp.Abstraction.Hooks +public interface IHookBase { - public interface IHookBase - { - /// - /// Agent Id - /// - string SelfId => string.Empty; - bool IsMatch(string agentId) => string.IsNullOrEmpty(SelfId) || SelfId == agentId; - } + /// + /// Agent Id + /// + string SelfId => string.Empty; + bool IsMatch(string agentId) => string.IsNullOrEmpty(SelfId) || SelfId == agentId; } diff --git a/src/Infrastructure/BotSharp.Abstraction/MCP/Services/IMcpService.cs b/src/Infrastructure/BotSharp.Abstraction/MCP/Services/IMcpService.cs index 71952dacf..32564a3f5 100644 --- a/src/Infrastructure/BotSharp.Abstraction/MCP/Services/IMcpService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/MCP/Services/IMcpService.cs @@ -2,5 +2,5 @@ namespace BotSharp.Abstraction.MCP.Services; public interface IMcpService { - IEnumerable GetServerConfigs() => []; + Task> GetServerConfigsAsync() => Task.FromResult>([]); } diff --git a/src/Infrastructure/BotSharp.Abstraction/MLTasks/IRealTimeCompletion.cs b/src/Infrastructure/BotSharp.Abstraction/MLTasks/IRealTimeCompletion.cs index 11174356e..d6057859c 100644 --- a/src/Infrastructure/BotSharp.Abstraction/MLTasks/IRealTimeCompletion.cs +++ b/src/Infrastructure/BotSharp.Abstraction/MLTasks/IRealTimeCompletion.cs @@ -10,14 +10,15 @@ public interface IRealTimeCompletion Task Connect( RealtimeHubConnection conn, - Action onModelReady, - Action onModelAudioDeltaReceived, - Action onModelAudioResponseDone, - Action onAudioTranscriptDone, - Action> onModelResponseDone, - Action onConversationItemCreated, - Action onInputAudioTranscriptionCompleted, - Action onInterruptionDetected); + Func onModelReady, + Func onModelAudioDeltaReceived, + Func onModelAudioResponseDone, + Func onModelAudioTranscriptDone, + Func, Task> onModelResponseDone, + Func onConversationItemCreated, + Func onInputAudioTranscriptionDone, + Func onInterruptionDetected); + Task AppenAudioBuffer(string message); Task AppenAudioBuffer(ArraySegment data, int length); @@ -29,6 +30,4 @@ Task Connect( Task RemoveConversationItem(string itemId); Task TriggerModelInference(string? instructions = null); Task CancelModelResponse(); - Task> OnResponsedDone(RealtimeHubConnection conn, string response); - Task OnConversationItemCreated(RealtimeHubConnection conn, string response); } diff --git a/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeModelSettings.cs b/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeModelSettings.cs index daf8714a8..14f5923fb 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeModelSettings.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeModelSettings.cs @@ -12,7 +12,12 @@ public class RealtimeModelSettings public string Voice { get; set; } = "alloy"; public float Temperature { get; set; } = 0.8f; public int MaxResponseOutputTokens { get; set; } = 512; - public int ModelResponseTimeout { get; set; } = 30; + public int ModelResponseTimeoutSeconds { get; set; } = 30; + + /// + /// Whether the target event arrives after ModelResponseTimeoutSeconds, e.g., "response.done" + /// + public string? ModelResponseTimeoutEndEvent { get; set; } public AudioTranscription InputAudioTranscription { get; set; } = new(); public ModelTurnDetection TurnDetection { get; set; } = new(); } diff --git a/src/Infrastructure/BotSharp.Core/Routing/Executor/IFunctionExecutor.cs b/src/Infrastructure/BotSharp.Abstraction/Routing/Executor/IFunctionExecutor.cs similarity index 77% rename from src/Infrastructure/BotSharp.Core/Routing/Executor/IFunctionExecutor.cs rename to src/Infrastructure/BotSharp.Abstraction/Routing/Executor/IFunctionExecutor.cs index 4ba2e69f7..e1c604adf 100644 --- a/src/Infrastructure/BotSharp.Core/Routing/Executor/IFunctionExecutor.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Routing/Executor/IFunctionExecutor.cs @@ -1,8 +1,7 @@ -namespace BotSharp.Core.Routing.Executor; +namespace BotSharp.Abstraction.Routing.Executor; public interface IFunctionExecutor { public Task ExecuteAsync(RoleDialogModel message); - public Task GetIndicatorAsync(RoleDialogModel message); } diff --git a/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs b/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs index 5e1fcfee1..4eb4c64e9 100644 --- a/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs +++ b/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs @@ -72,7 +72,8 @@ public async Task OnFunctionExecuted(RoleDialogModel message) { return; } - else if (message.StopCompletion) + + if (message.StopCompletion) { await hub.Completer.TriggerModelInference($"Say to user: \"{message.Content}\""); } diff --git a/src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs b/src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs index 97ac91c27..070bcf267 100644 --- a/src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs +++ b/src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs @@ -77,8 +77,8 @@ await HookEmitter.Emit(_services, async hook => await hook.OnMode { var data = _conn.OnModelAudioResponseDone(); await (responseToUser?.Invoke(data) ?? Task.CompletedTask); - }, - onAudioTranscriptDone: async transcript => + }, + onModelAudioTranscriptDone: async transcript => { }, @@ -98,6 +98,8 @@ await HookEmitter.Emit(_services, async hook => await hook.OnRouti } await routing.InvokeFunction(message.FunctionName, message); + dialogs.Add(message); + storage.Append(_conn.ConversationId, message); } else { @@ -120,7 +122,7 @@ await HookEmitter.Emit(_services, async hook => await hook.OnRouti { }, - onInputAudioTranscriptionCompleted: async message => + onInputAudioTranscriptionDone: async message => { // append input audio transcript to conversation dialogs.Add(message); diff --git a/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj b/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj index 76b534a7e..ddcf68cb4 100644 --- a/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj +++ b/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj @@ -96,6 +96,8 @@ + + @@ -204,6 +206,11 @@ PreserveNewest + + + + PreserveNewest + diff --git a/src/Infrastructure/BotSharp.Core/Conversations/Services/ConversationStorage.cs b/src/Infrastructure/BotSharp.Core/Conversations/Services/ConversationStorage.cs index 49852eb22..eeceeb091 100644 --- a/src/Infrastructure/BotSharp.Core/Conversations/Services/ConversationStorage.cs +++ b/src/Infrastructure/BotSharp.Core/Conversations/Services/ConversationStorage.cs @@ -40,6 +40,8 @@ public void Append(string conversationId, IEnumerable dialogs) MessageId = dialog.MessageId, MessageType = dialog.MessageType, FunctionName = dialog.FunctionName, + FunctionArgs = dialog.FunctionArgs, + ToolCallId = dialog.ToolCallId, CreatedTime = dialog.CreatedAt }; @@ -109,7 +111,6 @@ public List GetDialogs(string conversationId) var currentAgentId = meta.AgentId; var messageId = meta.MessageId; var messageType = meta.MessageType; - var function = meta.FunctionName; var senderId = role == AgentRole.Function ? currentAgentId : meta.SenderId; var createdAt = meta.CreatedTime; var richContent = !string.IsNullOrEmpty(dialog.RichContent) ? @@ -124,7 +125,9 @@ public List GetDialogs(string conversationId) MessageType = messageType, CreatedAt = createdAt, SenderId = senderId, - FunctionName = function, + FunctionName = meta.FunctionName, + FunctionArgs = meta.FunctionArgs, + ToolCallId = meta.ToolCallId, RichContent = richContent, SecondaryContent = secondaryContent, SecondaryRichContent = secondaryRichContent, diff --git a/src/Infrastructure/BotSharp.Core/Demo/Functions/GetWeatherFn.cs b/src/Infrastructure/BotSharp.Core/Demo/Functions/GetWeatherFn.cs new file mode 100644 index 000000000..a78ad4ec7 --- /dev/null +++ b/src/Infrastructure/BotSharp.Core/Demo/Functions/GetWeatherFn.cs @@ -0,0 +1,23 @@ +using BotSharp.Abstraction.Functions; + +namespace BotSharp.Core.Demo.Functions; + +public class GetWeatherFn : IFunctionCallback +{ + private readonly IServiceProvider _services; + + public GetWeatherFn(IServiceProvider services) + { + _services = services; + } + + public string Name => "get_weather"; + public string Indication => "Querying weather"; + + public async Task Execute(RoleDialogModel message) + { + message.Content = $"It is a sunny day!"; + //message.StopCompletion = true; + return true; + } +} \ No newline at end of file diff --git a/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs b/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs index 692e72204..f89127e27 100644 --- a/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs +++ b/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs @@ -1,4 +1,3 @@ -using BotSharp.Abstraction.Realtime.Models.Session; using System.Buffers; using System.ClientModel; using System.Net.WebSockets; @@ -44,6 +43,9 @@ public async ValueTask MoveNextAsync() if (receivedResult.CloseStatus.HasValue) { +#if DEBUG + Console.WriteLine($"Websocket close: {receivedResult.CloseStatus} {receivedResult.CloseStatusDescription}"); +#endif Current = null; return false; } diff --git a/src/Infrastructure/BotSharp.Core/MCP/BotSharpMCPExtensions.cs b/src/Infrastructure/BotSharp.Core/MCP/BotSharpMCPExtensions.cs index b3c4e6de6..8eeee7b35 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/BotSharpMCPExtensions.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/BotSharpMCPExtensions.cs @@ -18,15 +18,11 @@ public static IServiceCollection AddBotSharpMCP(this IServiceCollection services { var settings = config.GetSection("MCP").Get(); services.AddScoped(provider => settings); + services.AddScoped(); if (settings != null && settings.Enabled && !settings.McpServerConfigs.IsNullOrEmpty()) { - services.AddScoped(); - - var clientManager = new McpClientManager(settings); - services.AddScoped(provider => clientManager); - - // Register hooks + services.AddScoped(); services.AddScoped(); } return services; diff --git a/src/Infrastructure/BotSharp.Core/MCP/Helpers/AiFunctionHelper.cs b/src/Infrastructure/BotSharp.Core/MCP/Helpers/AiFunctionHelper.cs index d6f1cb5b1..3e15cd64b 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/Helpers/AiFunctionHelper.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/Helpers/AiFunctionHelper.cs @@ -1,19 +1,28 @@ -using System.Text.Json; using ModelContextProtocol.Client; namespace BotSharp.Core.MCP.Helpers; internal static class AiFunctionHelper { - public static FunctionDef MapToFunctionDef(McpClientTool tool) + public static FunctionDef? MapToFunctionDef(McpClientTool tool) { if (tool == null) { - throw new ArgumentNullException(nameof(tool)); + return null; } - var properties = tool.JsonSchema.GetProperty("properties"); - var required = tool.JsonSchema.GetProperty("required"); + var properties = "{}"; + var required = "[]"; + + if (tool.JsonSchema.TryGetProperty("properties", out var p)) + { + properties = p.GetRawText(); + } + + if (tool.JsonSchema.TryGetProperty("required", out var r)) + { + required = r.GetRawText(); + } var funDef = new FunctionDef { @@ -23,8 +32,8 @@ public static FunctionDef MapToFunctionDef(McpClientTool tool) Parameters = new FunctionParametersDef { Type = "object", - Properties = JsonDocument.Parse(properties.GetRawText()), - Required = JsonSerializer.Deserialize>(required.GetRawText()) + Properties = JsonDocument.Parse(properties), + Required = JsonSerializer.Deserialize>(required) ?? [] } }; diff --git a/src/Infrastructure/BotSharp.Core/MCP/Hooks/MCPToolAgentHook.cs b/src/Infrastructure/BotSharp.Core/MCP/Hooks/MCPToolAgentHook.cs index 08c38b6bc..743bf4c06 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/Hooks/MCPToolAgentHook.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/Hooks/MCPToolAgentHook.cs @@ -41,18 +41,26 @@ private async Task> GetMcpContent(Agent agent) return functionDefs; } - var mcpClientManager = _services.GetRequiredService(); - var mcps = agent.McpTools.Where(x => !x.Disabled); + var mcpClientManager = _services.GetService(); + if (mcpClientManager == null) + { + return functionDefs; + } + + var mcps = agent.McpTools?.Where(x => !x.Disabled) ?? []; foreach (var item in mcps) { var mcpClient = await mcpClientManager.GetMcpClientAsync(item.ServerId); - if (mcpClient != null) + if (mcpClient == null) continue; + + var tools = await mcpClient.ListToolsAsync(); + var toolNames = item.Functions.Select(x => x.Name).ToList(); + var targetTools = tools.Where(x => toolNames.Contains(x.Name, StringComparer.OrdinalIgnoreCase)); + foreach (var tool in targetTools) { - var tools = await mcpClient.ListToolsAsync(); - var toolnames = item.Functions.Select(x => x.Name).ToList(); - foreach (var tool in tools.Where(x => toolnames.Contains(x.Name, StringComparer.OrdinalIgnoreCase))) + var funDef = AiFunctionHelper.MapToFunctionDef(tool); + if (funDef != null) { - var funDef = AiFunctionHelper.MapToFunctionDef(tool); functionDefs.Add(funDef); } } diff --git a/src/Infrastructure/BotSharp.Core/MCP/Managers/McpClientManager.cs b/src/Infrastructure/BotSharp.Core/MCP/Managers/McpClientManager.cs index a4a89d895..50b798eb4 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/Managers/McpClientManager.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/Managers/McpClientManager.cs @@ -6,45 +6,63 @@ namespace BotSharp.Core.MCP.Managers; public class McpClientManager : IDisposable { - private readonly McpSettings _mcpSettings; + private readonly IServiceProvider _services; + private readonly ILogger _logger; - public McpClientManager(McpSettings mcpSettings) + public McpClientManager( + IServiceProvider services, + ILogger logger) { - _mcpSettings = mcpSettings; + _services = services; + _logger = logger; } - public async Task GetMcpClientAsync(string serverId) + public async Task GetMcpClientAsync(string serverId) { - var config = _mcpSettings.McpServerConfigs.Where(x => x.Id == serverId).FirstOrDefault(); - - IClientTransport transport; - if (config.SseConfig != null) + try { - transport = new SseClientTransport(new SseClientTransportOptions + var settings = _services.GetRequiredService(); + var config = settings.McpServerConfigs.Where(x => x.Id == serverId).FirstOrDefault(); + if (config == null) { - Name = config.Name, - Endpoint = new Uri(config.SseConfig.EndPoint), - AdditionalHeaders = config.SseConfig.AdditionalHeaders, - ConnectionTimeout = config.SseConfig.ConnectionTimeout - }); - } - else if (config.StdioConfig != null) - { - transport = new StdioClientTransport(new StdioClientTransportOptions + return null; + } + + IClientTransport? transport = null; + if (config.SseConfig != null) + { + transport = new SseClientTransport(new SseClientTransportOptions + { + Name = config.Name, + Endpoint = new Uri(config.SseConfig.EndPoint), + AdditionalHeaders = config.SseConfig.AdditionalHeaders, + ConnectionTimeout = config.SseConfig.ConnectionTimeout + }); + } + else if (config.StdioConfig != null) { - Name = config.Name, - Command = config.StdioConfig.Command, - Arguments = config.StdioConfig.Arguments, - EnvironmentVariables = config.StdioConfig.EnvironmentVariables, - ShutdownTimeout = config.StdioConfig.ShutdownTimeout - }); + transport = new StdioClientTransport(new StdioClientTransportOptions + { + Name = config.Name, + Command = config.StdioConfig.Command, + Arguments = config.StdioConfig.Arguments, + EnvironmentVariables = config.StdioConfig.EnvironmentVariables, + ShutdownTimeout = config.StdioConfig.ShutdownTimeout + }); + } + + if (transport == null) + { + return null; + } + + return await McpClientFactory.CreateAsync(transport, settings.McpClientOptions); } - else + catch (Exception ex) { - throw new ArgumentNullException("Invalid MCP server configuration!"); + _logger.LogWarning(ex, $"Error when loading mcp client {serverId}"); + return null; } - - return await McpClientFactory.CreateAsync(transport, _mcpSettings.McpClientOptions); } public void Dispose() diff --git a/src/Infrastructure/BotSharp.Core/MCP/Services/McpService.cs b/src/Infrastructure/BotSharp.Core/MCP/Services/McpService.cs index 3bff4442e..27f5326c7 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/Services/McpService.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/Services/McpService.cs @@ -1,6 +1,5 @@ using BotSharp.Core.MCP.Managers; using BotSharp.Core.MCP.Settings; -using Microsoft.Extensions.Logging; using ModelContextProtocol.Client; namespace BotSharp.Core.MCP.Services; @@ -9,35 +8,35 @@ public class McpService : IMcpService { private readonly IServiceProvider _services; private readonly ILogger _logger; - private readonly McpClientManager _mcpClientManager; public McpService( IServiceProvider services, - ILogger logger, - McpClientManager mcpClient) + ILogger logger) { _services = services; _logger = logger; - _mcpClientManager = mcpClient; } - public IEnumerable GetServerConfigs() + public async Task> GetServerConfigsAsync() { + var clientManager = _services.GetService(); + if (clientManager == null) return []; + var options = new List(); var settings = _services.GetRequiredService(); var configs = settings?.McpServerConfigs ?? []; foreach (var config in configs) { - var tools = _mcpClientManager.GetMcpClientAsync(config.Id) - .Result.ListToolsAsync() - .Result.Select(x=> x.Name); + var client = await clientManager.GetMcpClientAsync(config.Id); + if (client == null) continue; + var tools = await client.ListToolsAsync(); options.Add(new McpServerOptionModel { Id = config.Id, Name = config.Name, - Tools = tools + Tools = tools.Select(x => x.Name) }); } diff --git a/src/Infrastructure/BotSharp.Core/MCP/Settings/MCPSettings.cs b/src/Infrastructure/BotSharp.Core/MCP/Settings/MCPSettings.cs index 2867712f9..337230576 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/Settings/MCPSettings.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/Settings/MCPSettings.cs @@ -7,5 +7,4 @@ public class McpSettings public bool Enabled { get; set; } = true; public McpClientOptions McpClientOptions { get; set; } public List McpServerConfigs { get; set; } = []; - } diff --git a/src/Infrastructure/BotSharp.Core/Routing/Executor/DummyFunctionExecutor.cs b/src/Infrastructure/BotSharp.Core/Routing/Executor/DummyFunctionExecutor.cs index d075e1851..91e0bc97e 100644 --- a/src/Infrastructure/BotSharp.Core/Routing/Executor/DummyFunctionExecutor.cs +++ b/src/Infrastructure/BotSharp.Core/Routing/Executor/DummyFunctionExecutor.cs @@ -1,19 +1,19 @@ +using BotSharp.Abstraction.Routing.Executor; using BotSharp.Abstraction.Templating; namespace BotSharp.Core.Routing.Executor; public class DummyFunctionExecutor: IFunctionExecutor { - private FunctionDef functionDef; private readonly IServiceProvider _services; + private readonly FunctionDef _functionDef; - public DummyFunctionExecutor(FunctionDef function, IServiceProvider services) + public DummyFunctionExecutor(IServiceProvider services, FunctionDef functionDef) { - functionDef = function; _services = services; + _functionDef = functionDef; } - public async Task ExecuteAsync(RoleDialogModel message) { var render = _services.GetRequiredService(); @@ -25,7 +25,7 @@ public async Task ExecuteAsync(RoleDialogModel message) dict[item.Key] = item.Value; } - var text = render.Render(functionDef.Output, dict); + var text = render.Render(_functionDef.Output!, dict); message.Content = text; return true; } diff --git a/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionCallbackExecutor.cs b/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionCallbackExecutor.cs index 939f4fb35..4b208374f 100644 --- a/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionCallbackExecutor.cs +++ b/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionCallbackExecutor.cs @@ -1,23 +1,24 @@ +using BotSharp.Abstraction.Routing.Executor; using BotSharp.Abstraction.Functions; namespace BotSharp.Core.Routing.Executor; public class FunctionCallbackExecutor : IFunctionExecutor { - IFunctionCallback functionCallback; + private readonly IFunctionCallback _functionCallback; public FunctionCallbackExecutor(IFunctionCallback functionCallback) { - this.functionCallback = functionCallback; + _functionCallback = functionCallback; } public async Task ExecuteAsync(RoleDialogModel message) { - return await functionCallback.Execute(message); + return await _functionCallback.Execute(message); } public async Task GetIndicatorAsync(RoleDialogModel message) { - return await functionCallback.GetIndication(message); + return await _functionCallback.GetIndication(message); } } diff --git a/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionExecutorFactory.cs b/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionExecutorFactory.cs index 3fb895094..8a4a54865 100644 --- a/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionExecutorFactory.cs +++ b/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionExecutorFactory.cs @@ -1,41 +1,31 @@ using BotSharp.Abstraction.Functions; +using BotSharp.Abstraction.Routing.Executor; namespace BotSharp.Core.Routing.Executor; internal class FunctionExecutorFactory { - public static IFunctionExecutor Create(string functionName, Agent agent, IFunctionCallback functioncall, IServiceProvider serviceProvider) + public static IFunctionExecutor? Create(IServiceProvider services, string functionName, Agent agent) { - if(functioncall != null) + var functionCall = services.GetServices().FirstOrDefault(x => x.Name == functionName); + if (functionCall != null) { - return new FunctionCallbackExecutor(functioncall); + return new FunctionCallbackExecutor(functionCall); } - var funDef = agent?.Functions?.FirstOrDefault(x => x.Name == functionName); - if (funDef != null) + var functions = (agent?.Functions ?? []).Concat(agent?.SecondaryFunctions ?? []); + var funcDef = functions.FirstOrDefault(x => x.Name == functionName); + if (!string.IsNullOrWhiteSpace(funcDef?.Output)) { - if (!string.IsNullOrWhiteSpace(funDef?.Output)) - { - return new DummyFunctionExecutor(funDef,serviceProvider); - } + return new DummyFunctionExecutor(services, funcDef); } - else + + var mcpServerId = agent?.McpTools?.Where(x => x.Functions.Any(y => y.Name == funcDef?.Name))?.FirstOrDefault()?.ServerId; + if (!string.IsNullOrWhiteSpace(mcpServerId)) { - funDef = agent?.SecondaryFunctions?.FirstOrDefault(x => x.Name == functionName); - if (funDef != null) - { - if (!string.IsNullOrWhiteSpace(funDef?.Output)) - { - return new DummyFunctionExecutor(funDef, serviceProvider); - } - else - { - var mcpServerId = agent?.McpTools?.Where(x => x.Functions.Any(y => y.Name == funDef.Name)) - .FirstOrDefault().ServerId; - return new MCPToolExecutor(mcpServerId, functionName, serviceProvider); - } - } + return new McpToolExecutor(services, mcpServerId, functionName); } + return null; } } diff --git a/src/Infrastructure/BotSharp.Core/Routing/Executor/MCPToolExecutor.cs b/src/Infrastructure/BotSharp.Core/Routing/Executor/MCPToolExecutor.cs index f7625b485..c452e8066 100644 --- a/src/Infrastructure/BotSharp.Core/Routing/Executor/MCPToolExecutor.cs +++ b/src/Infrastructure/BotSharp.Core/Routing/Executor/MCPToolExecutor.cs @@ -1,21 +1,20 @@ +using BotSharp.Abstraction.Routing.Executor; using BotSharp.Core.MCP.Managers; using ModelContextProtocol.Client; namespace BotSharp.Core.Routing.Executor; -public class MCPToolExecutor: IFunctionExecutor +public class McpToolExecutor: IFunctionExecutor { - private readonly McpClientManager _clientManager; - private string mcpServer; - private string funcName; private readonly IServiceProvider _services; + private readonly string _mcpServerId; + private readonly string _functionName; - public MCPToolExecutor(string mcpserver, string functionName, IServiceProvider services) + public McpToolExecutor(IServiceProvider services, string mcpServerId, string functionName) { _services = services; - this.mcpServer = mcpserver; - this.funcName = functionName; - _clientManager = services.GetRequiredService(); + _mcpServerId = mcpServerId; + _functionName = functionName; } public async Task ExecuteAsync(RoleDialogModel message) @@ -23,12 +22,13 @@ public async Task ExecuteAsync(RoleDialogModel message) try { // Convert arguments to dictionary format expected by mcpdotnet - Dictionary argDict = JsonToDictionary(message.FunctionArgs); + Dictionary argDict = JsonToDictionary(message.FunctionArgs); - var client = await _clientManager.GetMcpClientAsync(mcpServer); + var clientManager = _services.GetRequiredService(); + var client = await clientManager.GetMcpClientAsync(_mcpServerId); // Call the tool through mcpdotnet - var result = await client.CallToolAsync(funcName, !argDict.IsNullOrEmpty() ? argDict : []); + var result = await client.CallToolAsync(_functionName, !argDict.IsNullOrEmpty() ? argDict : []); // Extract the text content from the result var json = string.Join("\n", result.Content.Where(c => c.Type == "text").Select(c => c.Text)); @@ -39,7 +39,7 @@ public async Task ExecuteAsync(RoleDialogModel message) } catch (Exception ex) { - message.Content = $"Error when calling tool {funcName} of MCP server {mcpServer}. {ex.Message}"; + message.Content = $"Error when calling tool {_functionName} of MCP server {_mcpServerId}. {ex.Message}"; return false; } } diff --git a/src/Infrastructure/BotSharp.Core/Routing/RoutingService.InvokeFunction.cs b/src/Infrastructure/BotSharp.Core/Routing/RoutingService.InvokeFunction.cs index 697e2aa67..757549394 100644 --- a/src/Infrastructure/BotSharp.Core/Routing/RoutingService.InvokeFunction.cs +++ b/src/Infrastructure/BotSharp.Core/Routing/RoutingService.InvokeFunction.cs @@ -1,6 +1,4 @@ -using BotSharp.Abstraction.Functions; using BotSharp.Abstraction.Hooks; -using BotSharp.Abstraction.Templating; using BotSharp.Core.Routing.Executor; namespace BotSharp.Core.Routing; @@ -9,14 +7,11 @@ public partial class RoutingService { public async Task InvokeFunction(string name, RoleDialogModel message) { - var function = _services.GetServices().FirstOrDefault(x => x.Name == name); - var currentAgentId = message.CurrentAgentId; var agentService = _services.GetRequiredService(); var agent = await agentService.GetAgent(currentAgentId); - IFunctionExecutor funcExecutor = FunctionExecutorFactory.Create(name, agent, function, _services); - + var funcExecutor = FunctionExecutorFactory.Create(_services, name, agent); if (funcExecutor == null) { message.StopCompletion = true; @@ -25,13 +20,11 @@ public async Task InvokeFunction(string name, RoleDialogModel message) return false; } - // Clone message var clonedMessage = RoleDialogModel.From(message); clonedMessage.FunctionName = name; var progressService = _services.GetService(); - clonedMessage.Indication = await funcExecutor.GetIndicatorAsync(message); if (progressService?.OnFunctionExecuting != null) diff --git a/src/Infrastructure/BotSharp.Core/Session/BotSharpRealtimeSession.cs b/src/Infrastructure/BotSharp.Core/Session/BotSharpRealtimeSession.cs index 0c863b7b7..7f5f6c159 100644 --- a/src/Infrastructure/BotSharp.Core/Session/BotSharpRealtimeSession.cs +++ b/src/Infrastructure/BotSharp.Core/Session/BotSharpRealtimeSession.cs @@ -55,7 +55,7 @@ private ChatSessionUpdate HandleSessionResult(ClientResult result) }; } - public async Task SendEvent(string message) + public async Task SendEventAsync(string message) { if (_websocket.State == WebSocketState.Open) { @@ -64,7 +64,7 @@ public async Task SendEvent(string message) } } - public async Task Disconnect() + public async Task DisconnectAsync() { if (_websocket.State == WebSocketState.Open) { diff --git a/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs b/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs index 70b8ba096..60ecee044 100644 --- a/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs +++ b/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs @@ -22,14 +22,17 @@ public LlmRealtimeSession( _sessionOptions = sessionOptions; } - public async Task ConnectAsync(Uri uri, Dictionary headers, CancellationToken cancellationToken = default) + public async Task ConnectAsync(Uri uri, Dictionary? headers = null, CancellationToken cancellationToken = default) { _webSocket?.Dispose(); _webSocket = new ClientWebSocket(); - foreach (var header in headers) + if (!headers.IsNullOrEmpty()) { - _webSocket.Options.SetRequestHeader(header.Key, header.Value); + foreach (var header in headers) + { + _webSocket.Options.SetRequestHeader(header.Key, header.Value); + } } await _webSocket.ConnectAsync(uri, cancellationToken); @@ -68,7 +71,7 @@ private ChatSessionUpdate HandleSessionResult(ClientResult result) }; } - public async Task SendEventToModel(object message) + public async Task SendEventToModelAsync(object message) { if (_webSocket.State != WebSocketState.Open) { @@ -93,7 +96,7 @@ public async Task SendEventToModel(object message) } } - public async Task Disconnect() + public async Task DisconnectAsync() { if (_webSocket.State == WebSocketState.Open) { @@ -103,6 +106,7 @@ public async Task Disconnect() public void Dispose() { + _clientEventSemaphore?.Dispose(); _webSocket?.Dispose(); } } diff --git a/src/Infrastructure/BotSharp.Core/Statistics/Services/BotSharpStatsService.cs b/src/Infrastructure/BotSharp.Core/Statistics/Services/BotSharpStatsService.cs index b5320c45a..8cbf974f8 100644 --- a/src/Infrastructure/BotSharp.Core/Statistics/Services/BotSharpStatsService.cs +++ b/src/Infrastructure/BotSharp.Core/Statistics/Services/BotSharpStatsService.cs @@ -1,4 +1,3 @@ -using BotSharp.Abstraction.Infrastructures; using BotSharp.Abstraction.Statistics.Settings; namespace BotSharp.Core.Statistics.Services; diff --git a/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_weather.json b/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_weather.json new file mode 100644 index 000000000..0fd0a459b --- /dev/null +++ b/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_weather.json @@ -0,0 +1,14 @@ +{ + "name": "get_weather", + "description": "Get weather information for user.", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The city where the user wants to get weather information." + } + }, + "required": [ "city" ] + } +} \ No newline at end of file diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/ConversationController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/ConversationController.cs index 897c34e73..bcb92f740 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/ConversationController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/ConversationController.cs @@ -143,55 +143,29 @@ public async Task> GetDialogs([FromRoute] string { var service = _services.GetRequiredService(); var userService = _services.GetRequiredService(); + var settings = _services.GetRequiredService(); + var (isAdmin, user) = await userService.IsAdminUser(_user.Id); - if (user == null) - { - return null; - } var filter = new ConversationFilter { Id = conversationId, - UserId = !isAdmin ? user.Id : null, + UserId = !isAdmin ? user?.Id : null, IsLoadLatestStates = isLoadStates }; - var conversations = await service.GetConversations(filter); - if (conversations.Items.IsNullOrEmpty()) - { - return null; - } - var result = ConversationViewModel.FromSession(conversations.Items.First()); - var state = _services.GetRequiredService(); - user = await userService.GetUser(result.User.Id); - result.User = UserViewModel.FromUser(user); - - return result; - } - - [HttpPost("/conversation/summary")] - public async Task GetConversationSummary([FromBody] ConversationSummaryModel input) - { - var service = _services.GetRequiredService(); - return await service.GetConversationSummary(input.ConversationIds); - } + var conversations = await service.GetConversations(filter); + var conv = !conversations.Items.IsNullOrEmpty() + ? ConversationViewModel.FromSession(conversations.Items.First()) + : new(); - [HttpGet("/conversation/{conversationId}/user")] - public async Task GetConversationUser([FromRoute] string conversationId) - { - var service = _services.GetRequiredService(); - var conversations = await service.GetConversations(new ConversationFilter - { - Id = conversationId - }); + user = !string.IsNullOrEmpty(conv?.User?.Id) + ? await userService.GetUser(conv.User.Id) + : null; - var userService = _services.GetRequiredService(); - var conversation = conversations?.Items?.FirstOrDefault(); - var userId = conversation == null ? _user.Id : conversation.UserId; - var user = await userService.GetUser(userId); if (user == null) { - return new UserViewModel + user = new User { Id = _user.Id, UserName = _user.UserName, @@ -202,7 +176,16 @@ public async Task GetConversationUser([FromRoute] string conversa }; } - return UserViewModel.FromUser(user); + conv.User = UserViewModel.FromUser(user); + conv.IsRealtimeEnabled = settings?.Assemblies?.Contains("BotSharp.Core.Realtime") ?? false; + return conv; + } + + [HttpPost("/conversation/summary")] + public async Task GetConversationSummary([FromBody] ConversationSummaryModel input) + { + var service = _services.GetRequiredService(); + return await service.GetConversationSummary(input.ConversationIds); } [HttpPut("/conversation/{conversationId}/update-title")] diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/McpController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/McpController.cs index 7b74a37ec..6519d0109 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/McpController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/McpController.cs @@ -13,9 +13,9 @@ public McpController(IServiceProvider services) } [HttpGet("/mcp/server-configs")] - public IEnumerable GetMcpServerConfigs() + public async Task> GetMcpServerConfigs() { var mcp = _services.GetRequiredService(); - return mcp.GetServerConfigs(); + return await mcp.GetServerConfigsAsync(); } } diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Conversations/View/ConversationViewModel.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Conversations/View/ConversationViewModel.cs index 6300f6612..e7110f44d 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Conversations/View/ConversationViewModel.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Conversations/View/ConversationViewModel.cs @@ -1,9 +1,13 @@ using BotSharp.Abstraction.Conversations.Dtos; +using System.Text.Json.Serialization; namespace BotSharp.OpenAPI.ViewModels.Conversations; public class ConversationViewModel : ConversationDto { + [JsonPropertyName("is_realtime_enabled")] + public bool IsRealtimeEnabled { get; set; } + public static ConversationViewModel FromSession(Conversation sess) { return new ConversationViewModel diff --git a/src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Chat/ChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Chat/ChatCompletionProvider.cs index 92b1a6b5f..e2d9a1613 100644 --- a/src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Chat/ChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Chat/ChatCompletionProvider.cs @@ -305,10 +305,10 @@ await onMessageReceived(new RoleDialogModel(choice.Role?.ToString() ?? ChatMessa { messages.Add(new AssistantChatMessage(new List { - ChatToolCall.CreateFunctionToolCall(message.FunctionName, message.FunctionName, BinaryData.FromString(message.FunctionArgs ?? string.Empty)) + ChatToolCall.CreateFunctionToolCall(message.ToolCallId ?? message.FunctionName, message.FunctionName, BinaryData.FromString(message.FunctionArgs ?? "{}")) })); - messages.Add(new ToolChatMessage(message.FunctionName, message.Content)); + messages.Add(new ToolChatMessage(message.ToolCallId ?? message.FunctionName, message.Content)); } else if (message.Role == AgentRole.User) { diff --git a/src/Plugins/BotSharp.Plugin.ChatHub/ChatStreamMiddleware.cs b/src/Plugins/BotSharp.Plugin.ChatHub/ChatStreamMiddleware.cs index b7d1b21a2..ece767f9a 100644 --- a/src/Plugins/BotSharp.Plugin.ChatHub/ChatStreamMiddleware.cs +++ b/src/Plugins/BotSharp.Plugin.ChatHub/ChatStreamMiddleware.cs @@ -94,8 +94,7 @@ private async Task HandleWebSocket(IServiceProvider services, string agentId, st } } - - await _session.Disconnect(); + await _session.DisconnectAsync(); _session.Dispose(); } @@ -105,7 +104,7 @@ await hub.ConnectToModel(async data => { if (_session != null) { - await _session.SendEvent(data); + await _session.SendEventAsync(data); } }); } diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/BotSharp.Plugin.GoogleAI.csproj b/src/Plugins/BotSharp.Plugin.GoogleAI/BotSharp.Plugin.GoogleAI.csproj index c4c28822e..ac68607cc 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/BotSharp.Plugin.GoogleAI.csproj +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/BotSharp.Plugin.GoogleAI.csproj @@ -1,4 +1,4 @@ - + $(TargetFramework) diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeClientPayload.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeClientPayload.cs new file mode 100644 index 000000000..579215952 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeClientPayload.cs @@ -0,0 +1,18 @@ +using GenerativeAI.Types; + +namespace BotSharp.Plugin.GoogleAI.Models.Realtime; + +internal class RealtimeClientPayload +{ + [JsonPropertyName("setup")] + public RealtimeGenerateContentSetup? Setup { get; set; } + + [JsonPropertyName("clientContent")] + public BidiGenerateContentClientContent? ClientContent { get; set; } + + [JsonPropertyName("realtimeInput")] + public BidiGenerateContentRealtimeInput? RealtimeInput { get; set; } + + [JsonPropertyName("toolResponse")] + public BidiGenerateContentToolResponse? ToolResponse { get; set; } +} diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeGenerateContentSetup.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeGenerateContentSetup.cs new file mode 100644 index 000000000..c334c7ff8 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeGenerateContentSetup.cs @@ -0,0 +1,35 @@ +using GenerativeAI.Types; + +namespace BotSharp.Plugin.GoogleAI.Models.Realtime; + +internal class RealtimeGenerateContentSetup +{ + [JsonPropertyName("model")] + public string? Model { get; set; } + + [JsonPropertyName("generationConfig")] + public GenerationConfig? GenerationConfig { get; set; } + + [JsonPropertyName("systemInstruction")] + public Content? SystemInstruction { get; set; } + + [JsonPropertyName("tools")] + public Tool[]? Tools { get; set; } + + [JsonPropertyName("inputAudioTranscription")] + public AudioTranscriptionConfig? InputAudioTranscription { get; set; } + + [JsonPropertyName("outputAudioTranscription")] + public AudioTranscriptionConfig? OutputAudioTranscription { get; set; } + + [JsonPropertyName("sessionResumption")] + public SessionResumptionConfig? SessionResumption { get; set; } +} + +internal class AudioTranscriptionConfig { } + +internal class SessionResumptionConfig +{ + [JsonPropertyName("handle")] + public string? Handle { get; set; } +} \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs new file mode 100644 index 000000000..dd6ff508a --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs @@ -0,0 +1,106 @@ +using GenerativeAI.Types; + +namespace BotSharp.Plugin.GoogleAI.Models.Realtime; + +internal class RealtimeServerResponse +{ + [JsonPropertyName("setupComplete")] + public RealtimeGenerateContentSetupComplete? SetupComplete { get; set; } + + [JsonPropertyName("serverContent")] + public RealtimeGenerateContentServerContent? ServerContent { get; set; } + + [JsonPropertyName("usageMetadata")] + public RealtimeUsageMetaData? UsageMetaData { get; set; } + + [JsonPropertyName("toolCall")] + public RealtimeToolCall? ToolCall { get; set; } + + [JsonPropertyName("sessionResumptionUpdate")] + public RealtimeSessionResumptionUpdate? SessionResumptionUpdate { get; set; } +} + + +internal class RealtimeGenerateContentSetupComplete { } + +internal class RealtimeGenerateContentServerContent +{ + [JsonPropertyName("turnComplete")] + public bool? TurnComplete { get; set; } + + [JsonPropertyName("generationComplete")] + public bool? GenerationComplete { get; set; } + + [JsonPropertyName("interrupted")] + public bool? Interrupted { get; set; } + + [JsonPropertyName("modelTurn")] + public Content? ModelTurn { get; set; } + + [JsonPropertyName("inputTranscription")] + public RealtimeGenerateContentTranscription? InputTranscription { get; set; } + + [JsonPropertyName("outputTranscription")] + public RealtimeGenerateContentTranscription? OutputTranscription { get; set; } +} + +internal class RealtimeUsageMetaData +{ + [JsonPropertyName("promptTokenCount")] + public int? PromptTokenCount { get; set; } + + [JsonPropertyName("responseTokenCount")] + public int? ResponseTokenCount { get; set; } + + [JsonPropertyName("totalTokenCount")] + public int? TotalTokenCount { get; set; } + + [JsonPropertyName("promptTokensDetails")] + public List? PromptTokensDetails { get; set; } + + [JsonPropertyName("responseTokensDetails")] + public List? ResponseTokensDetails { get; set; } +} + + +internal class RealtimeTokenDetail +{ + [JsonPropertyName("modality")] + public string? Modality { get; set; } + + [JsonPropertyName("tokenCount")] + public int? TokenCount { get; set; } +} + +internal class RealtimeGenerateContentTranscription +{ + [JsonPropertyName("text")] + public string? Text { get; set; } +} + +internal class RealtimeToolCall +{ + [JsonPropertyName("functionCalls")] + public List? FunctionCalls { get; set; } +} + +internal class RealtimeFunctionCall +{ + [JsonPropertyName("id")] + public string Id { get; set; } + + [JsonPropertyName("name")] + public string Name { get; set; } + + [JsonPropertyName("args")] + public JsonNode? Args { get; set; } +} + +internal class RealtimeSessionResumptionUpdate +{ + [JsonPropertyName("newHandle")] + public string? NewHandle { get; set; } + + [JsonPropertyName("resumable")] + public bool? Resumable { get; set; } +} \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs new file mode 100644 index 000000000..189252fa8 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs @@ -0,0 +1,53 @@ +using System.IO; + +namespace BotSharp.Plugin.GoogleAI.Models.Realtime; + +internal class RealtimeTranscriptionResponse : IDisposable +{ + public RealtimeTranscriptionResponse() + { + + } + + private MemoryStream _contentStream = new(); + public Stream? ContentStream + { + get + { + return _contentStream != null ? _contentStream : new MemoryStream(); + } + } + + public void Collect(string text) + { + var binary = BinaryData.FromString(text); + var bytes = binary.ToArray(); + + _contentStream.Position = _contentStream.Length; + _contentStream.Write(bytes, 0, bytes.Length); + _contentStream.Position = 0; + } + + public string GetText() + { + if (_contentStream.Length == 0) + { + return string.Empty; + } + + var bytes = _contentStream.ToArray(); + var text = Encoding.UTF8.GetString(bytes, 0, bytes.Length); + return text; + } + + public void Clear() + { + _contentStream.Position = 0; + _contentStream.SetLength(0); + } + + public void Dispose() + { + _contentStream?.Dispose(); + } +} diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs index 95dda6c1b..608533623 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs @@ -271,6 +271,7 @@ public void SetModelName(string model) { FunctionCall = new FunctionCall { + Id = message.ToolCallId, Name = message.FunctionName, Args = JsonNode.Parse(message.FunctionArgs ?? "{}") } @@ -282,6 +283,7 @@ public void SetModelName(string model) { FunctionResponse = new FunctionResponse { + Id = message.ToolCallId, Name = message.FunctionName, Response = new JsonObject() { diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/PalmChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/PalmChatCompletionProvider.cs index dde9fa57b..72a47adc0 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/PalmChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/PalmChatCompletionProvider.cs @@ -1,7 +1,3 @@ -using BotSharp.Abstraction.Agents; -using BotSharp.Abstraction.Agents.Enums; -using BotSharp.Abstraction.Loggers; -using BotSharp.Abstraction.Functions.Models; using BotSharp.Abstraction.Routing; using LLMSharp.Google.Palm; using LLMSharp.Google.Palm.DiscussService; @@ -9,6 +5,7 @@ namespace BotSharp.Plugin.GoogleAi.Providers.Chat; +[Obsolete] public class PalmChatCompletionProvider : IChatCompletion { private readonly IServiceProvider _services; diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs index 3f4b87539..5df8ee13a 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -1,9 +1,11 @@ +using System.Threading; using BotSharp.Abstraction.Hooks; +using BotSharp.Abstraction.Realtime.Models.Session; +using BotSharp.Core.Session; +using BotSharp.Plugin.GoogleAI.Models.Realtime; using GenerativeAI; -using GenerativeAI.Core; -using GenerativeAI.Live; -using GenerativeAI.Live.Extensions; using GenerativeAI.Types; +using GenerativeAI.Types.Converters; namespace BotSharp.Plugin.GoogleAi.Providers.Realtime; @@ -13,14 +15,38 @@ public class GoogleRealTimeProvider : IRealTimeCompletion public string Model => _model; private string _model = GoogleAIModels.Gemini2FlashExp; - private MultiModalLiveClient _client; - private GenerativeModel _chatClient; + private readonly IServiceProvider _services; private readonly ILogger _logger; private List renderedInstructions = []; + private LlmRealtimeSession _session; private readonly GoogleAiSettings _settings; + private const string DEFAULT_MIME_TYPE = "audio/pcm;rate=16000"; + private readonly JsonSerializerOptions _jsonOptions = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + PropertyNameCaseInsensitive = true, + Converters = { new JsonStringEnumConverter(), new DateOnlyJsonConverter(), new TimeOnlyJsonConverter() }, + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, + UnknownTypeHandling = JsonUnknownTypeHandling.JsonElement + }; + + private RealtimeTranscriptionResponse _inputStream = new(); + private RealtimeTranscriptionResponse _outputStream = new(); + + + private RealtimeHubConnection _conn; + private Func _onModelReady; + private Func _onModelAudioDeltaReceived; + private Func _onModelAudioResponseDone; + private Func _onModelAudioTranscriptDone; + private Func, Task> _onModelResponseDone; + private Func _onConversationItemCreated; + private Func _onInputAudioTranscriptionDone; + private Func _onInterruptionDetected; + public GoogleRealTimeProvider( IServiceProvider services, GoogleAiSettings settings, @@ -36,26 +62,16 @@ public void SetModelName(string model) _model = model; } - private RealtimeHubConnection _conn; - private Action _onModelReady; - private Action _onModelAudioDeltaReceived; - private Action _onModelAudioResponseDone; - private Action _onModelAudioTranscriptDone; - private Action> _onModelResponseDone; - private Action _onConversationItemCreated; - private Action _onInputAudioTranscriptionCompleted; - private Action _onUserInterrupted; - - - public async Task Connect(RealtimeHubConnection conn, - Action onModelReady, - Action onModelAudioDeltaReceived, - Action onModelAudioResponseDone, - Action onModelAudioTranscriptDone, - Action> onModelResponseDone, - Action onConversationItemCreated, - Action onInputAudioTranscriptionCompleted, - Action onUserInterrupted) + public async Task Connect( + RealtimeHubConnection conn, + Func onModelReady, + Func onModelAudioDeltaReceived, + Func onModelAudioResponseDone, + Func onModelAudioTranscriptDone, + Func, Task> onModelResponseDone, + Func onConversationItemCreated, + Func onInputAudioTranscriptionDone, + Func onInterruptionDetected) { _conn = conn; _onModelReady = onModelReady; @@ -64,206 +80,224 @@ public async Task Connect(RealtimeHubConnection conn, _onModelAudioTranscriptDone = onModelAudioTranscriptDone; _onModelResponseDone = onModelResponseDone; _onConversationItemCreated = onConversationItemCreated; - _onInputAudioTranscriptionCompleted = onInputAudioTranscriptionCompleted; - _onUserInterrupted = onUserInterrupted; + _onInputAudioTranscriptionDone = onInputAudioTranscriptionDone; + _onInterruptionDetected = onInterruptionDetected; + var settingsService = _services.GetRequiredService(); var realtimeModelSettings = _services.GetRequiredService(); - _model = realtimeModelSettings.Model; - var client = ProviderHelper.GetGeminiClient(Provider, _model, _services); - _chatClient = client.CreateGenerativeModel(_model); - _client = _chatClient.CreateMultiModalLiveClient( - config: new GenerationConfig - { - ResponseModalities = [Modality.AUDIO], - }, - systemInstruction: "You are a helpful assistant.", - logger: _logger); + _model = realtimeModelSettings.Model; + var modelSettings = settingsService.GetSetting(Provider, _model); - await AttachEvents(_client); + Reset(); + _inputStream = new(); + _outputStream = new(); + _session = new LlmRealtimeSession(_services, new ChatSessionOptions + { + JsonOptions = _jsonOptions + }); - await _client.ConnectAsync(false); + var uri = BuildWebsocketUri(modelSettings.ApiKey, "v1beta"); + await _session.ConnectAsync(uri: uri, cancellationToken: CancellationToken.None); + await onModelReady(); + _ = ReceiveMessage(); } - public async Task Disconnect() + private async Task ReceiveMessage() { - if (_client != null) - await _client.DisconnectAsync(); - } + await foreach (ChatSessionUpdate update in _session.ReceiveUpdatesAsync(CancellationToken.None)) + { + var receivedText = update?.RawResponse; + if (string.IsNullOrEmpty(receivedText)) + { + continue; + } - public async Task AppenAudioBuffer(string message) - { - await _client.SendAudioAsync(Convert.FromBase64String(message)); - } + try + { + var response = JsonSerializer.Deserialize(receivedText, _jsonOptions); + if (response == null) + { + continue; + } - public async Task AppenAudioBuffer(ArraySegment data, int length) - { - var buffer = data.AsSpan(0, length).ToArray(); - await _client.SendAudioAsync(buffer,"audio/pcm;rate=16000"); - } + if (response.SetupComplete != null) + { + _logger.LogInformation($"Session setup completed."); + } + else if (response.SessionResumptionUpdate != null) + { + _logger.LogInformation($"Session resumption update => New handle: {response.SessionResumptionUpdate.NewHandle}, Resumable: {response.SessionResumptionUpdate.Resumable}"); + } + else if (response.ToolCall != null && !response.ToolCall.FunctionCalls.IsNullOrEmpty()) + { + var functionCall = response.ToolCall.FunctionCalls!.First(); - public async Task TriggerModelInference(string? instructions = null) - { - await _client.SendClientContentAsync(new BidiGenerateContentClientContent() - { - TurnComplete = true, - }); - } + _logger.LogInformation($"Tool call received: {functionCall.Name}({functionCall.Args?.ToJsonString(_jsonOptions) ?? string.Empty})."); - public async Task CancelModelResponse() - { - } + if (functionCall != null) + { + var messages = OnFunctionCall(_conn, functionCall); + await _onModelResponseDone(messages); + } + } + else if (response.ServerContent != null) + { + if (response.ServerContent.InputTranscription?.Text != null) + { + _inputStream.Collect(response.ServerContent.InputTranscription.Text); + } - public async Task RemoveConversationItem(string itemId) - { - } + if (response.ServerContent.OutputTranscription?.Text != null) + { + _outputStream.Collect(response.ServerContent.OutputTranscription.Text); + } - private Task AttachEvents(MultiModalLiveClient client) - { - client.Connected += (sender, e) => - { - _logger.LogInformation("Google Realtime Client connected."); - _onModelReady(); - }; + if (response.ServerContent.ModelTurn != null) + { + _logger.LogInformation($"Model audio delta received."); - client.Disconnected += (sender, e) => - { - _logger.LogInformation("Google Realtime Client disconnected."); - }; + // Handle input transcription + var inputTranscription = _inputStream.GetText(); + if (!string.IsNullOrEmpty(inputTranscription)) + { + var message = OnUserAudioTranscriptionCompleted(_conn, inputTranscription); + await _onInputAudioTranscriptionDone(message); + } + _inputStream.Clear(); - client.MessageReceived += async (sender, e) => - { - _logger.LogInformation("User message received."); - if (e.Payload.SetupComplete != null) - { - _onConversationItemCreated(_client.ConnectionId.ToString()); - } + var parts = response.ServerContent.ModelTurn.Parts; + if (!parts.IsNullOrEmpty()) + { + foreach (var part in parts) + { + if (!string.IsNullOrEmpty(part.InlineData?.Data)) + { + await _onModelAudioDeltaReceived(part.InlineData.Data, string.Empty); + } + } + } + } + else if (response.ServerContent.GenerationComplete == true) + { + _logger.LogInformation($"Model generation completed."); + } + else if (response.ServerContent.TurnComplete == true) + { + _logger.LogInformation($"Model turn completed."); - if (e.Payload.ServerContent != null) - { - if (e.Payload.ServerContent.TurnComplete == true) - { - var responseDone = await ResponseDone(_conn, e.Payload.ServerContent); - _onModelResponseDone(responseDone); + // Handle output transcription + var outputTranscription = _outputStream.GetText(); + if (!string.IsNullOrEmpty(outputTranscription)) + { + var messages = await OnResponseDone(_conn, outputTranscription, response.UsageMetaData); + await _onModelResponseDone(messages); + } + _inputStream.Clear(); + _outputStream.Clear(); + } } } - }; + catch (Exception ex) + { + _logger.LogError(ex, $"Error when deserializing server response. {ex.Message}"); + break; + } + } - client.AudioChunkReceived += (sender, e) => - { - _onModelAudioDeltaReceived(Convert.ToBase64String(e.Buffer), Guid.NewGuid().ToString()); - }; + _inputStream.Dispose(); + _outputStream.Dispose(); + _session.Dispose(); + } - client.TextChunkReceived += (sender, e) => - { - _onInputAudioTranscriptionCompleted(new RoleDialogModel(AgentRole.Assistant, e.Text)); - }; - client.GenerationInterrupted += (sender, e) => + public async Task Disconnect() + { + if (_session != null) { - _logger.LogInformation("Audio generation interrupted."); - _onUserInterrupted(); - }; + _inputStream?.Dispose(); + _outputStream?.Dispose(); + await _session.DisconnectAsync(); + _session.Dispose(); + } + } - client.AudioReceiveCompleted += (sender, e) => + public async Task AppenAudioBuffer(string message) + { + await SendEventToModel(new RealtimeClientPayload { - _logger.LogInformation("Audio receive completed."); - _onModelAudioResponseDone(); - }; + RealtimeInput = new() + { + MediaChunks = [new() { Data = message, MimeType = DEFAULT_MIME_TYPE }] + } + }); + } - client.ErrorOccurred += (sender, e) => + public async Task AppenAudioBuffer(ArraySegment data, int length) + { + var buffer = data.AsSpan(0, length).ToArray(); + await SendEventToModel(new RealtimeClientPayload { - var ex = e.GetException(); - _logger.LogError(ex, "Error occurred in Google Realtime Client"); - }; - - return Task.CompletedTask; + RealtimeInput = new() + { + MediaChunks = [new() { Data = Convert.ToBase64String(buffer), MimeType = DEFAULT_MIME_TYPE }] + } + }); } - private async Task> ResponseDone(RealtimeHubConnection conn, - BidiGenerateContentServerContent serverContent) + public async Task TriggerModelInference(string? instructions = null) { - var outputs = new List(); + if (string.IsNullOrWhiteSpace(instructions)) return; - var parts = serverContent.ModelTurn?.Parts; - if (parts != null) + var content = new Content(instructions, AgentRole.User); + await SendEventToModel(new RealtimeClientPayload { - foreach (var part in parts) + ClientContent = new() { - var call = part.FunctionCall; - if (call != null) - { - var item = new RoleDialogModel(AgentRole.Assistant, part.Text) - { - CurrentAgentId = conn.CurrentAgentId, - MessageId = call.Id ?? String.Empty, - MessageType = MessageTypeName.FunctionCall - }; - outputs.Add(item); - } - else - { - var item = new RoleDialogModel(AgentRole.Assistant, call.Args?.ToJsonString() ?? string.Empty) - { - CurrentAgentId = conn.CurrentAgentId, - FunctionName = call.Name, - FunctionArgs = call.Args?.ToJsonString() ?? string.Empty, - ToolCallId = call.Id ?? String.Empty, - MessageId = call.Id ?? String.Empty, - MessageType = MessageTypeName.FunctionCall - }; - outputs.Add(item); - } + Turns = [content], + TurnComplete = true } - } + }); + } - var contentHooks = _services.GetHooks(conn.CurrentAgentId); - // After chat completion hook - foreach (var hook in contentHooks) - { - await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, "response.done") - { - CurrentAgentId = conn.CurrentAgentId - }, new TokenStatsModel - { - Provider = Provider, - Model = _model, - }); - } + public async Task CancelModelResponse() + { + + } + + public async Task RemoveConversationItem(string itemId) + { - return outputs; } public async Task SendEventToModel(object message) { - //todo Send Audio Chunks to Model, Botsharp RealTime Implementation seems to be incomplete + if (_session == null) return; + + await _session.SendEventToModelAsync(message); } public async Task UpdateSession(RealtimeHubConnection conn, bool isInit = false) { - var convService = _services.GetRequiredService(); - var conv = await convService.GetConversation(conn.ConversationId); + if (!isInit) + { + return string.Empty; + } var agentService = _services.GetRequiredService(); - var agent = await agentService.LoadAgent(conn.CurrentAgentId); + var realtimeSetting = _services.GetRequiredService(); - var (prompt, request) = PrepareOptions(_chatClient, agent, new List()); + var agent = await agentService.LoadAgent(conn.CurrentAgentId); + var (prompt, request) = PrepareOptions(agent, []); - var config = request.GenerationConfig; + var config = request.GenerationConfig ?? new(); //Output Modality can either be text or audio - if (config != null) - { - config.ResponseModalities = new List([Modality.AUDIO]); - - var words = new List(); - HookEmitter.Emit(_services, hook => words.AddRange(hook.OnModelTranscriptPrompt(agent)), agent.Id); + config.ResponseModalities = [Modality.AUDIO]; + config.Temperature = Math.Max(realtimeSetting.Temperature, 0.6f); + config.MaxOutputTokens = realtimeSetting.MaxResponseOutputTokens; - var realtimeModelSettings = _services.GetRequiredService(); - - config.Temperature = Math.Max(realtimeModelSettings.Temperature, 0.6f); - config.MaxOutputTokens = realtimeModelSettings.MaxResponseOutputTokens; - } - + var words = new List(); + HookEmitter.Emit(_services, hook => words.AddRange(hook.OnModelTranscriptPrompt(agent)), agent.Id); var functions = request.Tools?.SelectMany(s => s.FunctionDeclarations).Select(x => { @@ -271,10 +305,10 @@ public async Task UpdateSession(RealtimeHubConnection conn, bool isInit { Name = x.Name ?? string.Empty, Description = x.Description ?? string.Empty, + Parameters = x.Parameters != null + ? JsonSerializer.Deserialize(JsonSerializer.Serialize(x.Parameters)) + : null }; - fn.Parameters = x.Parameters != null - ? JsonSerializer.Deserialize(JsonSerializer.Serialize(x.Parameters)) - : null; return fn; }).ToArray(); @@ -283,86 +317,146 @@ await HookEmitter.Emit(_services, if (_settings.Gemini.UseGoogleSearch) { - if (request.Tools == null) - request.Tools = new List(); + request.Tools ??= []; request.Tools.Add(new Tool() { GoogleSearch = new GoogleSearchTool() }); } - // if(request.Tools.Count == 0) - // request.Tools = null; - // config.MaxOutputTokens = null; - - await _client.SendSetupAsync(new BidiGenerateContentSetup() + var payload = new RealtimeClientPayload { - GenerationConfig = config, - Model = Model.ToModelId(), - SystemInstruction = request.SystemInstruction, - Tools = request.Tools?.ToArray(), - }); + Setup = new RealtimeGenerateContentSetup() + { + GenerationConfig = config, + Model = Model.ToModelId(), + SystemInstruction = request.SystemInstruction, + Tools = request.Tools?.ToArray(), + InputAudioTranscription = realtimeSetting.InputAudioTranscribe ? new() : null, + OutputAudioTranscription = realtimeSetting.InputAudioTranscribe ? new() : null + } + }; + + _logger.LogInformation($"Setup payload: {JsonSerializer.Serialize(payload, _jsonOptions)}"); + await SendEventToModel(payload); return prompt; } public async Task InsertConversationItem(RoleDialogModel message) { - if (_client == null) - throw new Exception("Client is not initialized"); if (message.Role == AgentRole.Function) { var function = new FunctionResponse() { + Id = message.ToolCallId, Name = message.FunctionName ?? string.Empty, - Response = JsonNode.Parse(message.Content ?? "{}") + Response = new JsonObject() + { + ["result"] = message.Content ?? string.Empty + } }; - await _client.SendToolResponseAsync(new BidiGenerateContentToolResponse() + await SendEventToModel(new RealtimeClientPayload { - FunctionResponses = [function] + ToolResponse = new() + { + FunctionResponses = [function] + } }); } else if (message.Role == AgentRole.Assistant) { + await SendEventToModel(new RealtimeClientPayload + { + ClientContent = new() + { + Turns = [new Content(message.Content, AgentRole.Model)], + TurnComplete = true + } + }); } else if (message.Role == AgentRole.User) { - await _client.SentTextAsync(message.Content); + await SendEventToModel(new RealtimeClientPayload + { + ClientContent = new() + { + Turns = [new Content(message.Content, AgentRole.User)], + TurnComplete = true + } + }); } else { - throw new NotImplementedException(""); + throw new NotImplementedException($"Unrecognized role {message.Role}."); } } - public Task> OnResponsedDone(RealtimeHubConnection conn, string response) + #region Private methods + private List OnFunctionCall(RealtimeHubConnection conn, RealtimeFunctionCall functionCall) { - throw new NotImplementedException(""); + var outputs = new List + { + new(AgentRole.Assistant, string.Empty) + { + CurrentAgentId = conn.CurrentAgentId, + FunctionName = functionCall.Name, + FunctionArgs = functionCall.Args?.ToJsonString(_jsonOptions), + ToolCallId = functionCall.Id, + MessageType = MessageTypeName.FunctionCall + } + }; + + return outputs; } - public Task OnConversationItemCreated(RealtimeHubConnection conn, string response) + private async Task> OnResponseDone(RealtimeHubConnection conn, string text, RealtimeUsageMetaData? usage) { - return Task.FromResult(new RoleDialogModel(AgentRole.User, response)); + var outputs = new List + { + new(AgentRole.Assistant, text) + { + CurrentAgentId = conn.CurrentAgentId, + MessageId = Guid.NewGuid().ToString(), + MessageType = MessageTypeName.Plain + } + }; + + if (usage != null) + { + var contentHooks = _services.GetHooks(conn.CurrentAgentId); + foreach (var hook in contentHooks) + { + await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, text) + { + CurrentAgentId = conn.CurrentAgentId + }, + new TokenStatsModel + { + Provider = Provider, + Model = _model, + Prompt = text, + TextInputTokens = usage.PromptTokensDetails?.FirstOrDefault(x => x.Modality == Modality.TEXT.ToString())?.TokenCount ?? 0, + AudioInputTokens = usage.PromptTokensDetails?.FirstOrDefault(x => x.Modality == Modality.AUDIO.ToString())?.TokenCount ?? 0, + TextOutputTokens = usage.ResponseTokensDetails?.FirstOrDefault(x => x.Modality == Modality.TEXT.ToString())?.TokenCount ?? 0, + AudioOutputTokens = usage.ResponseTokensDetails?.FirstOrDefault(x => x.Modality == Modality.AUDIO.ToString())?.TokenCount ?? 0 + }); + } + } + + return outputs; } - private (string, GenerateContentRequest) PrepareOptions(GenerativeModel aiModel, Agent agent, + + private (string, GenerateContentRequest) PrepareOptions(Agent agent, List conversations) { var agentService = _services.GetRequiredService(); var googleSettings = _settings; renderedInstructions = []; - // Add settings - aiModel.UseGoogleSearch = googleSettings.Gemini.UseGoogleSearch; - aiModel.UseGrounding = googleSettings.Gemini.UseGrounding; - - aiModel.FunctionCallingBehaviour = new FunctionCallingBehaviour() - { - AutoCallFunction = false - }; - // Assembly messages var contents = new List(); var tools = new List(); @@ -418,6 +512,7 @@ public Task OnConversationItemCreated(RealtimeHubConnection con { FunctionCall = new FunctionCall { + Id = message.ToolCallId, Name = message.FunctionName, Args = JsonNode.Parse(message.FunctionArgs ?? "{}") } @@ -429,6 +524,7 @@ public Task OnConversationItemCreated(RealtimeHubConnection con { FunctionResponse = new FunctionResponse { + Id = message.ToolCallId, Name = message.FunctionName ?? string.Empty, Response = new JsonObject() { @@ -459,6 +555,7 @@ public Task OnConversationItemCreated(RealtimeHubConnection con var maxTokens = int.TryParse(state.GetState("max_tokens"), out var tokens) ? tokens : agent.LlmConfig?.MaxOutputTokens ?? LlmConstant.DEFAULT_MAX_OUTPUT_TOKEN; + var request = new GenerateContentRequest { SystemInstruction = !systemPrompts.IsNullOrEmpty() @@ -496,4 +593,29 @@ private string GetPrompt(IEnumerable systemPrompts, IEnumerable return prompt; } + + + private RoleDialogModel OnUserAudioTranscriptionCompleted(RealtimeHubConnection conn, string text) + { + return new RoleDialogModel(AgentRole.User, text) + { + CurrentAgentId = conn.CurrentAgentId + }; + } + + private Uri BuildWebsocketUri(string apiKey, string version = "v1beta") + { + return new Uri($"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.{version}.GenerativeService.BidiGenerateContent?key={apiKey}"); + } + + private void Reset() + { + _inputStream?.Clear(); + _outputStream?.Clear(); + + _inputStream?.Dispose(); + _outputStream?.Dispose(); + _session?.Dispose(); + } + #endregion } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Text/PalmTextCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Text/PalmTextCompletionProvider.cs index b75116d99..14d3ebab9 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Text/PalmTextCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Text/PalmTextCompletionProvider.cs @@ -1,10 +1,8 @@ -using BotSharp.Abstraction.Agents.Enums; -using BotSharp.Abstraction.Conversations; using BotSharp.Abstraction.Hooks; -using BotSharp.Abstraction.Loggers; namespace BotSharp.Plugin.GoogleAi.Providers.Text; +[Obsolete] public class PalmTextCompletionProvider : ITextCompletion { private readonly IServiceProvider _services; diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Using.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Using.cs index a4e6606e0..daff1e0b0 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Using.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Using.cs @@ -16,14 +16,15 @@ global using BotSharp.Abstraction.Agents.Models; global using BotSharp.Abstraction.MLTasks; global using BotSharp.Abstraction.Utilities; -global using BotSharp.Plugin.GoogleAi.Settings; global using BotSharp.Abstraction.Realtime; global using BotSharp.Abstraction.Realtime.Models; global using BotSharp.Core.Infrastructures; -global using BotSharp.Plugin.GoogleAi.Providers.Chat; global using BotSharp.Abstraction.Agents; global using BotSharp.Abstraction.Agents.Enums; global using BotSharp.Abstraction.Conversations; global using BotSharp.Abstraction.Conversations.Enums; global using BotSharp.Abstraction.Functions.Models; -global using BotSharp.Abstraction.Loggers; \ No newline at end of file +global using BotSharp.Abstraction.Loggers; + +global using BotSharp.Plugin.GoogleAi.Settings; +global using BotSharp.Plugin.GoogleAi.Providers.Chat; \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Models/DialogMongoElement.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Models/DialogMongoElement.cs index 030a93abb..b67fc53cb 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Models/DialogMongoElement.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Models/DialogMongoElement.cs @@ -47,6 +47,8 @@ public class DialogMetaDataMongoElement public string MessageId { get; set; } = default!; public string MessageType { get; set; } = default!; public string? FunctionName { get; set; } + public string? FunctionArgs { get; set; } + public string? ToolCallId { get; set; } public string? SenderId { get; set; } public DateTime CreateTime { get; set; } @@ -59,6 +61,8 @@ public static DialogMetaData ToDomainElement(DialogMetaDataMongoElement meta) MessageId = meta.MessageId, MessageType = meta.MessageType, FunctionName = meta.FunctionName, + FunctionArgs = meta.FunctionArgs, + ToolCallId = meta.ToolCallId, SenderId = meta.SenderId, CreatedTime = meta.CreateTime, }; @@ -73,6 +77,8 @@ public static DialogMetaDataMongoElement ToMongoElement(DialogMetaData meta) MessageId = meta.MessageId, MessageType = meta.MessageType, FunctionName = meta.FunctionName, + FunctionArgs = meta.FunctionArgs, + ToolCallId = meta.ToolCallId, SenderId = meta.SenderId, CreateTime = meta.CreatedTime, }; diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Conversation.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Conversation.cs index 722c77303..02f5a6951 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Conversation.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Conversation.cs @@ -1,6 +1,5 @@ using BotSharp.Abstraction.Conversations.Models; using BotSharp.Abstraction.Repositories.Filters; -using MongoDB.Driver; using System.Text.Json; namespace BotSharp.Plugin.MongoStorage.Repository; diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Models/Realtime/RealtimeSessionBody.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Models/Realtime/RealtimeSessionBody.cs index a5ed764e0..f767a6f16 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Models/Realtime/RealtimeSessionBody.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Models/Realtime/RealtimeSessionBody.cs @@ -76,7 +76,7 @@ public class RealtimeSessionTurnDetection public string Type { get; set; } = "semantic_vad"; [JsonPropertyName("eagerness")] - public string eagerness { get;set; } = "auto"; + public string Eagerness { get;set; } = "auto"; } public class InputAudioTranscription diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs index 95d4b909f..c0a9c0d43 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs @@ -1,4 +1,3 @@ -using BotSharp.Abstraction.Agents.Models; using BotSharp.Abstraction.Hooks; using OpenAI.Chat; @@ -273,10 +272,10 @@ await onMessageReceived(new RoleDialogModel(choice.Role?.ToString() ?? ChatMessa { messages.Add(new AssistantChatMessage(new List { - ChatToolCall.CreateFunctionToolCall(message.FunctionName, message.FunctionName, BinaryData.FromString(message.FunctionArgs ?? string.Empty)) + ChatToolCall.CreateFunctionToolCall(message.ToolCallId ?? message.FunctionName, message.FunctionName, BinaryData.FromString(message.FunctionArgs ?? "{}")) })); - messages.Add(new ToolChatMessage(message.FunctionName, message.Content)); + messages.Add(new ToolChatMessage(message.ToolCallId ?? message.FunctionName, message.Content)); } else if (message.Role == AgentRole.User) { diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs index 61e7dddc6..52f634472 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -31,26 +31,22 @@ public RealTimeCompletionProvider( public async Task Connect( RealtimeHubConnection conn, - Action onModelReady, - Action onModelAudioDeltaReceived, - Action onModelAudioResponseDone, - Action onModelAudioTranscriptDone, - Action> onModelResponseDone, - Action onConversationItemCreated, - Action onInputAudioTranscriptionCompleted, - Action onInterruptionDetected) + Func onModelReady, + Func onModelAudioDeltaReceived, + Func onModelAudioResponseDone, + Func onModelAudioTranscriptDone, + Func, Task> onModelResponseDone, + Func onConversationItemCreated, + Func onInputAudioTranscriptionDone, + Func onInterruptionDetected) { var settingsService = _services.GetRequiredService(); - var realtimeModelSettings = _services.GetRequiredService(); + var realtimeSettings = _services.GetRequiredService(); - _model = realtimeModelSettings.Model; + _model = realtimeSettings.Model; var settings = settingsService.GetSetting(Provider, _model); - if (_session != null) - { - _session.Dispose(); - } - + _session?.Dispose(); _session = new LlmRealtimeSession(_services, new ChatSessionOptions { JsonOptions = _botsharpOptions.JsonSerializerOptions @@ -66,6 +62,7 @@ await _session.ConnectAsync( cancellationToken: CancellationToken.None); _ = ReceiveMessage( + realtimeSettings, conn, onModelReady, onModelAudioDeltaReceived, @@ -73,87 +70,24 @@ await _session.ConnectAsync( onModelAudioTranscriptDone, onModelResponseDone, onConversationItemCreated, - onInputAudioTranscriptionCompleted, + onInputAudioTranscriptionDone, onInterruptionDetected); } - public async Task Disconnect() - { - if (_session != null) - { - await _session.Disconnect(); - _session.Dispose(); - } - } - - public async Task AppenAudioBuffer(string message) - { - var audioAppend = new - { - type = "input_audio_buffer.append", - audio = message - }; - - await SendEventToModel(audioAppend); - } - - public async Task AppenAudioBuffer(ArraySegment data, int length) - { - var message = Convert.ToBase64String(data.AsSpan(0, length).ToArray()); - await AppenAudioBuffer(message); - } - - public async Task TriggerModelInference(string? instructions = null) - { - // Triggering model inference - if (!string.IsNullOrEmpty(instructions)) - { - await SendEventToModel(new - { - type = "response.create", - response = new - { - instructions - } - }); - } - else - { - await SendEventToModel(new - { - type = "response.create" - }); - } - } - - public async Task CancelModelResponse() - { - await SendEventToModel(new - { - type = "response.cancel" - }); - } - - public async Task RemoveConversationItem(string itemId) - { - await SendEventToModel(new - { - type = "conversation.item.delete", - item_id = itemId - }); - } - private async Task ReceiveMessage( + RealtimeModelSettings realtimeSettings, RealtimeHubConnection conn, - Action onModelReady, - Action onModelAudioDeltaReceived, - Action onModelAudioResponseDone, - Action onModelAudioTranscriptDone, - Action> onModelResponseDone, - Action onConversationItemCreated, - Action onUserAudioTranscriptionCompleted, - Action onInterruptionDetected) + Func onModelReady, + Func onModelAudioDeltaReceived, + Func onModelAudioResponseDone, + Func onModelAudioTranscriptDone, + Func, Task> onModelResponseDone, + Func onConversationItemCreated, + Func onInputAudioTranscriptionDone, + Func onInterruptionDetected) { + DateTime? startTime = null; + await foreach (ChatSessionUpdate update in _session.ReceiveUpdatesAsync(CancellationToken.None)) { var receivedText = update?.RawResponse; @@ -164,6 +98,17 @@ private async Task ReceiveMessage( var response = JsonSerializer.Deserialize(receivedText); + if (realtimeSettings?.ModelResponseTimeoutSeconds > 0 + && !string.IsNullOrWhiteSpace(realtimeSettings?.ModelResponseTimeoutEndEvent) + && startTime.HasValue + && (DateTime.UtcNow - startTime.Value).TotalSeconds >= realtimeSettings.ModelResponseTimeoutSeconds + && response.Type != realtimeSettings.ModelResponseTimeoutEndEvent) + { + startTime = null; + await TriggerModelInference("Responsd to user immediately"); + continue; + } + if (response.Type == "error") { _logger.LogError($"{response.Type}: {receivedText}"); @@ -176,7 +121,7 @@ private async Task ReceiveMessage( else if (response.Type == "session.created") { _logger.LogInformation($"{response.Type}: {receivedText}"); - onModelReady(); + await onModelReady(); } else if (response.Type == "session.updated") { @@ -190,7 +135,7 @@ private async Task ReceiveMessage( { _logger.LogInformation($"{response.Type}: {receivedText}"); var data = JsonSerializer.Deserialize(receivedText); - onModelAudioTranscriptDone(data.Transcript); + await onModelAudioTranscriptDone(data.Transcript); } else if (response.Type == "response.audio.delta") { @@ -198,13 +143,13 @@ private async Task ReceiveMessage( if (audio?.Delta != null) { _logger.LogDebug($"{response.Type}: {receivedText}"); - onModelAudioDeltaReceived(audio.Delta, audio.ItemId); + await onModelAudioDeltaReceived(audio.Delta, audio.ItemId); } } else if (response.Type == "response.audio.done") { _logger.LogInformation($"{response.Type}: {receivedText}"); - onModelAudioResponseDone(); + await onModelAudioResponseDone(); } else if (response.Type == "response.done") { @@ -214,14 +159,14 @@ private async Task ReceiveMessage( { if (data.StatusDetails.Type == "incomplete" && data.StatusDetails.Reason == "max_output_tokens") { - onInterruptionDetected(); + await onInterruptionDetected(); await TriggerModelInference("Response user concisely"); } } else { var messages = await OnResponsedDone(conn, receivedText); - onModelResponseDone(messages); + await onModelResponseDone(messages); } } else if (response.Type == "conversation.item.created") @@ -229,7 +174,12 @@ private async Task ReceiveMessage( _logger.LogInformation($"{response.Type}: {receivedText}"); var data = JsonSerializer.Deserialize(receivedText); - onConversationItemCreated(receivedText); + if (data?.Item?.Role == "user") + { + startTime = DateTime.UtcNow; + } + + await onConversationItemCreated(receivedText); } else if (response.Type == "conversation.item.input_audio_transcription.completed") { @@ -238,14 +188,14 @@ private async Task ReceiveMessage( var message = await OnUserAudioTranscriptionCompleted(conn, receivedText); if (!string.IsNullOrEmpty(message.Content)) { - onUserAudioTranscriptionCompleted(message); + await onInputAudioTranscriptionDone(message); } } else if (response.Type == "input_audio_buffer.speech_started") { _logger.LogInformation($"{response.Type}: {receivedText}"); // Handle user interuption - onInterruptionDetected(); + await onInterruptionDetected(); } else if (response.Type == "input_audio_buffer.speech_stopped") { @@ -260,19 +210,85 @@ private async Task ReceiveMessage( _session.Dispose(); } + public async Task Disconnect() + { + if (_session != null) + { + await _session.DisconnectAsync(); + _session.Dispose(); + } + } + + public async Task AppenAudioBuffer(string message) + { + var audioAppend = new + { + type = "input_audio_buffer.append", + audio = message + }; + + await SendEventToModel(audioAppend); + } + + public async Task AppenAudioBuffer(ArraySegment data, int length) + { + var message = Convert.ToBase64String(data.AsSpan(0, length).ToArray()); + await AppenAudioBuffer(message); + } + + public async Task TriggerModelInference(string? instructions = null) + { + // Triggering model inference + if (!string.IsNullOrEmpty(instructions)) + { + await SendEventToModel(new + { + type = "response.create", + response = new + { + instructions + } + }); + } + else + { + await SendEventToModel(new + { + type = "response.create" + }); + } + } + + public async Task CancelModelResponse() + { + await SendEventToModel(new + { + type = "response.cancel" + }); + } + + public async Task RemoveConversationItem(string itemId) + { + await SendEventToModel(new + { + type = "conversation.item.delete", + item_id = itemId + }); + } + public async Task SendEventToModel(object message) { if (_session == null) return; - await _session.SendEventToModel(message); + await _session.SendEventToModelAsync(message); } public async Task UpdateSession(RealtimeHubConnection conn, bool isInit = false) { var convService = _services.GetRequiredService(); - var conv = await convService.GetConversation(conn.ConversationId); - var agentService = _services.GetRequiredService(); + + var conv = await convService.GetConversation(conn.ConversationId); var agent = await agentService.LoadAgent(conn.CurrentAgentId); var (prompt, messages, options) = PrepareOptions(agent, []); @@ -403,11 +419,105 @@ public async Task InsertConversationItem(RoleDialogModel message) } else { - throw new NotImplementedException(""); + throw new NotImplementedException($"Unrecognized role {message.Role}."); } } - protected (string, IEnumerable, ChatCompletionOptions) PrepareOptions(Agent agent, List conversations) + + public void SetModelName(string model) + { + _model = model; + } + + #region Private methods + private async Task> OnResponsedDone(RealtimeHubConnection conn, string response) + { + var outputs = new List(); + + var data = JsonSerializer.Deserialize(response).Body; + if (data.Status != "completed") + { + _logger.LogError(data.StatusDetails.ToString()); + /*if (data.StatusDetails.Type == "incomplete" && data.StatusDetails.Reason == "max_output_tokens") + { + await TriggerModelInference("Response user concisely"); + }*/ + return []; + } + + var prompts = new List(); + var inputTokenDetails = data.Usage?.InputTokenDetails; + var outputTokenDetails = data.Usage?.OutputTokenDetails; + + foreach (var output in data.Outputs) + { + if (output.Type == "function_call") + { + outputs.Add(new RoleDialogModel(AgentRole.Assistant, output.Arguments) + { + CurrentAgentId = conn.CurrentAgentId, + FunctionName = output.Name, + FunctionArgs = output.Arguments, + ToolCallId = output.CallId, + MessageId = output.Id, + MessageType = MessageTypeName.FunctionCall + }); + + prompts.Add($"{output.Name}({output.Arguments})"); + } + else if (output.Type == "message") + { + var content = output.Content.FirstOrDefault()?.Transcript ?? string.Empty; + + outputs.Add(new RoleDialogModel(output.Role, content) + { + CurrentAgentId = conn.CurrentAgentId, + MessageId = output.Id, + MessageType = MessageTypeName.Plain + }); + + prompts.Add(content); + } + } + + + // After chat completion hook + var text = string.Join("\r\n", prompts); + var contentHooks = _services.GetHooks(conn.CurrentAgentId); + + foreach (var hook in contentHooks) + { + await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, text) + { + CurrentAgentId = conn.CurrentAgentId + }, + new TokenStatsModel + { + Provider = Provider, + Model = _model, + Prompt = text, + TextInputTokens = inputTokenDetails?.TextTokens ?? 0 - inputTokenDetails?.CachedTokenDetails?.TextTokens ?? 0, + CachedTextInputTokens = data.Usage?.InputTokenDetails?.CachedTokenDetails?.TextTokens ?? 0, + AudioInputTokens = inputTokenDetails?.AudioTokens ?? 0 - inputTokenDetails?.CachedTokenDetails?.AudioTokens ?? 0, + CachedAudioInputTokens = inputTokenDetails?.CachedTokenDetails?.AudioTokens ?? 0, + TextOutputTokens = outputTokenDetails?.TextTokens ?? 0, + AudioOutputTokens = outputTokenDetails?.AudioTokens ?? 0 + }); + } + + return outputs; + } + + private async Task OnUserAudioTranscriptionCompleted(RealtimeHubConnection conn, string response) + { + var data = JsonSerializer.Deserialize(response); + return new RoleDialogModel(AgentRole.User, data.Transcript) + { + CurrentAgentId = conn.CurrentAgentId + }; + } + + private (string, IEnumerable, ChatCompletionOptions) PrepareOptions(Agent agent, List conversations) { var agentService = _services.GetRequiredService(); var state = _services.GetRequiredService(); @@ -589,103 +699,5 @@ private string GetPrompt(IEnumerable messages, ChatCompletionOption return prompt; } - - public void SetModelName(string model) - { - _model = model; - } - - public async Task> OnResponsedDone(RealtimeHubConnection conn, string response) - { - var outputs = new List(); - - var data = JsonSerializer.Deserialize(response).Body; - if (data.Status != "completed") - { - _logger.LogError(data.StatusDetails.ToString()); - /*if (data.StatusDetails.Type == "incomplete" && data.StatusDetails.Reason == "max_output_tokens") - { - await TriggerModelInference("Response user concisely"); - }*/ - return []; - } - - var contentHooks = _services.GetHooks(conn.CurrentAgentId); - - var prompts = new List(); - var inputTokenDetails = data.Usage?.InputTokenDetails; - var outputTokenDetails = data.Usage?.OutputTokenDetails; - - foreach (var output in data.Outputs) - { - if (output.Type == "function_call") - { - outputs.Add(new RoleDialogModel(AgentRole.Assistant, output.Arguments) - { - CurrentAgentId = conn.CurrentAgentId, - FunctionName = output.Name, - FunctionArgs = output.Arguments, - ToolCallId = output.CallId, - MessageId = output.Id, - MessageType = MessageTypeName.FunctionCall - }); - - prompts.Add($"{output.Name}({output.Arguments})"); - } - else if (output.Type == "message") - { - var content = output.Content.FirstOrDefault()?.Transcript ?? string.Empty; - - outputs.Add(new RoleDialogModel(output.Role, content) - { - CurrentAgentId = conn.CurrentAgentId, - MessageId = output.Id, - MessageType = MessageTypeName.Plain - }); - - prompts.Add(content); - } - } - - var text = string.Join("\r\n", prompts); - // After chat completion hook - foreach (var hook in contentHooks) - { - await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, text) - { - CurrentAgentId = conn.CurrentAgentId - }, - new TokenStatsModel - { - Provider = Provider, - Model = _model, - Prompt = text, - TextInputTokens = inputTokenDetails?.TextTokens ?? 0 - inputTokenDetails?.CachedTokenDetails?.TextTokens ?? 0, - CachedTextInputTokens = data.Usage?.InputTokenDetails?.CachedTokenDetails?.TextTokens ?? 0, - AudioInputTokens = inputTokenDetails?.AudioTokens ?? 0 - inputTokenDetails?.CachedTokenDetails?.AudioTokens ?? 0, - CachedAudioInputTokens = inputTokenDetails?.CachedTokenDetails?.AudioTokens ?? 0, - TextOutputTokens = outputTokenDetails?.TextTokens ?? 0, - AudioOutputTokens = outputTokenDetails?.AudioTokens ?? 0 - }); - } - - return outputs; - } - - public async Task OnUserAudioTranscriptionCompleted(RealtimeHubConnection conn, string response) - { - var data = JsonSerializer.Deserialize(response); - return new RoleDialogModel(AgentRole.User, data.Transcript) - { - CurrentAgentId = conn.CurrentAgentId - }; - } - - public async Task OnConversationItemCreated(RealtimeHubConnection conn, string response) - { - var item = response.JsonContent().Item; - var message = new RoleDialogModel(item.Role, item.Content.FirstOrDefault()?.Transcript); - - return message; - } + #endregion } \ No newline at end of file diff --git a/src/WebStarter/appsettings.json b/src/WebStarter/appsettings.json index b48d2963a..a1d4ed53f 100644 --- a/src/WebStarter/appsettings.json +++ b/src/WebStarter/appsettings.json @@ -268,13 +268,13 @@ } }, "McpServerConfigs": [ - { - "Id": "PizzaServer", - "Name": "PizzaServer", - "SseConfig": { - "Endpoint": "http://localhost:58905/sse" - } - } + //{ + // "Id": "PizzaServer", + // "Name": "PizzaServer", + // "SseConfig": { + // "Endpoint": "http://localhost:58905/sse" + // } + //} ] }, @@ -502,7 +502,6 @@ "BotSharp.Core.SideCar", "BotSharp.Core.Crontab", "BotSharp.Core.Realtime", - "BotSharp.Core.MCP", "BotSharp.Logger", "BotSharp.Plugin.MongoStorage", "BotSharp.Plugin.Dashboard", diff --git a/tests/BotSharp.LLM.Tests/GoogleRealTimeTests.cs b/tests/BotSharp.LLM.Tests/GoogleRealTimeTests.cs index 8c9eac4cf..061f05454 100644 --- a/tests/BotSharp.LLM.Tests/GoogleRealTimeTests.cs +++ b/tests/BotSharp.LLM.Tests/GoogleRealTimeTests.cs @@ -40,11 +40,16 @@ public async Task ShouldConnect_Tests() var realTimeCompleter = services.BuildServiceProvider().GetService(); realTimeCompleter.SetModelName(GoogleAIModels.Gemini2FlashExp); bool modelReady = false; - await realTimeCompleter.Connect(new RealtimeHubConnection(), () => { modelReady = true; }, - (s, s1) => { Console.WriteLine(s); }, () => { }, (s) => { Console.WriteLine(s); }, - (list => { Console.WriteLine(list); }), - (s => { Console.WriteLine(s); }), - (model => { Console.WriteLine(model); }), (() => { Console.WriteLine("UserInterrupted"); })); + await realTimeCompleter.Connect( + new RealtimeHubConnection(), + async () => { modelReady = true; }, + async (s, s1) => { Console.WriteLine(s); }, + async () => { }, + async (s) => { Console.WriteLine(s); }, + async list => { Console.WriteLine(list); }, + async s => { Console.WriteLine(s); }, + async model => { Console.WriteLine(model); }, + async () => { Console.WriteLine("UserInterrupted"); }); Thread.Sleep(1000); modelReady.ShouldBeTrue(); diff --git a/tests/BotSharp.Test.RealtimeVoice/appsettings.json b/tests/BotSharp.Test.RealtimeVoice/appsettings.json index e0ffcb8c4..24b852668 100644 --- a/tests/BotSharp.Test.RealtimeVoice/appsettings.json +++ b/tests/BotSharp.Test.RealtimeVoice/appsettings.json @@ -16,9 +16,14 @@ "Version": "2024-12-17", "ApiKey": "", "Type": "realtime", - "MultiModal": true, - "PromptCost": 0.0025, - "CompletionCost": 0.01 + "Cost": { + "TextInputCost": 0.0006, + "CachedTextInputCost": 0.0003, + "AudioInputCost": 0.01, + "CachedAudioInputCost": 0.0003, + "TextOutputCost": 0.0024, + "AudioOutputCost": 0.02 + } } ] }, @@ -31,9 +36,14 @@ "Version": "20240620", "ApiKey": "", "Type": "realtime", - "MultiModal": true, - "PromptCost": 0.003, - "CompletionCost": 0.015 + "Cost": { + "TextInputCost": 0.0006, + "CachedTextInputCost": 0.0003, + "AudioInputCost": 0.01, + "CachedAudioInputCost": 0.0003, + "TextOutputCost": 0.0024, + "AudioOutputCost": 0.02 + } } ] } diff --git a/tests/UnitTest/MainTest.cs b/tests/UnitTest/MainTest.cs index 307855e48..a02276bde 100644 --- a/tests/UnitTest/MainTest.cs +++ b/tests/UnitTest/MainTest.cs @@ -1,5 +1,6 @@ using Microsoft.Extensions.DependencyInjection; using BotSharp.Abstraction.Conversations; +using BotSharp.Abstraction.Hooks; namespace UnitTest { @@ -14,18 +15,16 @@ public void TestConversationHookProvider() services.AddSingleton(); services.AddSingleton(); services.AddSingleton(); - - services.AddSingleton(); var serviceProvider = services.BuildServiceProvider(); - var conversationHookProvider = serviceProvider.GetService(); + var hooks = serviceProvider.GetHooksOrderByPriority(string.Empty); - Assert.AreEqual(3, conversationHookProvider.Hooks.Count()); + Assert.AreEqual(3, hooks.Count()); var prevHook = default(IConversationHook); // Assert priority - foreach (var hook in conversationHookProvider.HooksOrderByPriority) + foreach (var hook in hooks) { if (prevHook != null) {