From acf0e9fee9ad24c33b6e77aabe9c5ee5a64e8849 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Tue, 24 Sep 2024 15:34:47 -0500 Subject: [PATCH] add collection exist --- .../Knowledges/IKnowledgeService.cs | 1 + .../Repositories/IBotSharpRepository.cs | 1 - .../VectorStorage/IVectorDb.cs | 3 +- .../FileRepository.KnowledgeBase.cs | 2 +- .../Controllers/KnowledgeBaseController.cs | 7 ++- .../MemVecDb/MemoryVectorDb.cs | 6 +++ .../Services/KnowledgeService.Document.cs | 22 ++++++--- .../Services/KnowledgeService.Vector.cs | 17 +++++++ .../BotSharp.Plugin.Qdrant/QdrantDb.cs | 49 ++++++++++--------- .../SemanticKernelMemoryStoreProvider.cs | 6 +++ 10 files changed, 81 insertions(+), 33 deletions(-) diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs index 291af2718..0b1c06b53 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs @@ -6,6 +6,7 @@ namespace BotSharp.Abstraction.Knowledges; public interface IKnowledgeService { #region Vector + Task ExistVectorCollection(string collectionName); Task CreateVectorCollection(string collectionName, string collectionType, int dimension, string provider, string model); Task DeleteVectorCollection(string collectionName); Task> GetVectorCollections(string type); diff --git a/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs b/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs index bfd757c19..ced839d04 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs @@ -116,7 +116,6 @@ public interface IBotSharpRepository bool AddKnowledgeCollectionConfigs(List configs, bool reset = false); bool DeleteKnowledgeCollectionConfig(string collectionName); IEnumerable GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter); - bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData); /// /// Delete file meta data in a knowledge collection, given the vector store provider. If "fileId" is null, delete all in the collection. diff --git a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/IVectorDb.cs b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/IVectorDb.cs index 42471d42d..ae0828de1 100644 --- a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/IVectorDb.cs +++ b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/IVectorDb.cs @@ -5,7 +5,8 @@ namespace BotSharp.Abstraction.VectorStorage; public interface IVectorDb { string Provider { get; } - + + Task DoesCollectionExist(string collectionName); Task> GetCollections(); Task> GetPagedCollectionData(string collectionName, VectorFilter filter); Task> GetCollectionData(string collectionName, IEnumerable ids, bool withPayload = false, bool withVector = false); diff --git a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs index 539276295..b0a950276 100644 --- a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs +++ b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs @@ -213,7 +213,7 @@ public PagedItems GetKnowledgeBaseFileMeta(string collecti return new PagedItems { - Items = records.Skip(filter.Offset).Take(filter.Size), + Items = records.OrderByDescending(x => x.CreateDate).Skip(filter.Offset).Take(filter.Size), Count = records.Count }; } diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs index af01a6859..3ead41b04 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs @@ -1,6 +1,5 @@ using BotSharp.Abstraction.Files.Utilities; using BotSharp.Abstraction.Graph.Models; -using BotSharp.Abstraction.Knowledges.Models; using BotSharp.Abstraction.VectorStorage.Models; using BotSharp.OpenAPI.ViewModels.Knowledges; @@ -20,6 +19,12 @@ public KnowledgeBaseController(IKnowledgeService knowledgeService, IServiceProvi } #region Vector + [HttpGet("knowledge/vector/{collection}/exist")] + public async Task ExistVectorCollection([FromRoute] string collection) + { + return await _knowledgeService.ExistVectorCollection(collection); + } + [HttpGet("knowledge/vector/collections")] public async Task> GetVectorCollections([FromQuery] string type) { diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/MemVecDb/MemoryVectorDb.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/MemVecDb/MemoryVectorDb.cs index 0ba4f3e98..41331c2b1 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/MemVecDb/MemoryVectorDb.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/MemVecDb/MemoryVectorDb.cs @@ -10,6 +10,12 @@ public class MemoryVectorDb : IVectorDb public string Provider => "MemoryVector"; + + public async Task DoesCollectionExist(string collectionName) + { + return false; + } + public async Task CreateCollection(string collectionName, int dimension) { _collections[collectionName] = dimension; diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs index bb8046733..bd851eb86 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs @@ -3,7 +3,6 @@ using BotSharp.Abstraction.Files.Utilities; using BotSharp.Abstraction.Knowledges.Helpers; using BotSharp.Abstraction.VectorStorage.Enums; -using System.Collections; using System.Net.Http; using System.Net.Mime; @@ -13,13 +12,21 @@ public partial class KnowledgeService { public async Task UploadDocumentsToKnowledge(string collectionName, IEnumerable files) { + var res = new UploadKnowledgeResponse + { + Success = [], + Failed = files?.Select(x => x.FileName) ?? new List() + }; + if (string.IsNullOrWhiteSpace(collectionName) || files.IsNullOrEmpty()) { - return new UploadKnowledgeResponse - { - Success = [], - Failed = files?.Select(x => x.FileName) ?? new List() - }; + return res; + } + + var exist = await ExistVectorCollection(collectionName); + if (!exist) + { + return res; } var db = _services.GetRequiredService(); @@ -103,6 +110,9 @@ public async Task ImportDocumentContentToKnowledge(string collectionName, try { + var exist = await ExistVectorCollection(collectionName); + if (!exist) return false; + var db = _services.GetRequiredService(); var userId = await GetUserId(); var vectorStoreProvider = _settings.VectorDb.Provider; diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs index 0c2d6e65a..ee46b13e2 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs @@ -7,6 +7,23 @@ namespace BotSharp.Plugin.KnowledgeBase.Services; public partial class KnowledgeService { #region Collection + public async Task ExistVectorCollection(string collectionName) + { + var db = _services.GetRequiredService(); + var vectorDb = GetVectorDb(); + + var exist = await vectorDb.DoesCollectionExist(collectionName); + if (exist) return true; + + var configs = db.GetKnowledgeCollectionConfigs(new VectorCollectionConfigFilter + { + CollectionNames = [collectionName], + VectorStroageProviders = [_settings.VectorDb.Provider] + }); + + return !configs.IsNullOrEmpty(); + } + public async Task CreateVectorCollection(string collectionName, string collectionType, int dimension, string provider, string model) { try diff --git a/src/Plugins/BotSharp.Plugin.Qdrant/QdrantDb.cs b/src/Plugins/BotSharp.Plugin.Qdrant/QdrantDb.cs index 5664fa402..6c54f60b9 100644 --- a/src/Plugins/BotSharp.Plugin.Qdrant/QdrantDb.cs +++ b/src/Plugins/BotSharp.Plugin.Qdrant/QdrantDb.cs @@ -39,16 +39,22 @@ private QdrantClient GetClient() return _client; } - public async Task CreateCollection(string collectionName, int dimension) + public async Task DoesCollectionExist(string collectionName) { var client = GetClient(); - var exist = await DoesCollectionExist(client, collectionName); + return await client.CollectionExistsAsync(collectionName); + } + + public async Task CreateCollection(string collectionName, int dimension) + { + var exist = await DoesCollectionExist(collectionName); if (exist) return false; try { // Create a new collection + var client = GetClient(); await client.CreateCollectionAsync(collectionName, new VectorParams() { Size = (ulong)dimension, @@ -65,11 +71,11 @@ public async Task CreateCollection(string collectionName, int dimension) public async Task DeleteCollection(string collectionName) { - var client = GetClient(); - var exist = await DoesCollectionExist(client, collectionName); + var exist = await DoesCollectionExist(collectionName); if (!exist) return false; + var client = GetClient(); await client.DeleteCollectionAsync(collectionName); return true; } @@ -83,8 +89,7 @@ public async Task> GetCollections() public async Task> GetPagedCollectionData(string collectionName, VectorFilter filter) { - var client = GetClient(); - var exist = await DoesCollectionExist(client, collectionName); + var exist = await DoesCollectionExist(collectionName); if (!exist) { return new StringIdPagedItems(); @@ -126,6 +131,7 @@ public async Task> GetPagedCollectionDa }; } + var client = GetClient(); var totalPointCount = await client.CountAsync(collectionName, filter: queryFilter); var response = await client.ScrollAsync(collectionName, limit: (uint)filter.Size, offset: !string.IsNullOrWhiteSpace(filter.StartId) ? new PointId { Uuid = filter.StartId } : null, @@ -152,15 +158,18 @@ public async Task> GetPagedCollectionDa public async Task> GetCollectionData(string collectionName, IEnumerable ids, bool withPayload = false, bool withVector = false) { - if (ids.IsNullOrEmpty()) return Enumerable.Empty(); - - var client = GetClient(); - var exist = await DoesCollectionExist(client, collectionName); + if (ids.IsNullOrEmpty()) + { + return Enumerable.Empty(); + } + + var exist = await DoesCollectionExist(collectionName); if (!exist) { return Enumerable.Empty(); } + var client = GetClient(); var pointIds = ids.Select(x => new PointId { Uuid = x.ToString() }).Distinct().ToList(); var points = await client.RetrieveAsync(collectionName, pointIds, withPayload, withVector); return points.Select(x => new VectorCollectionData @@ -209,8 +218,7 @@ public async Task> Search(string collectionNam { var results = new List(); - var client = GetClient(); - var exist = await DoesCollectionExist(client, collectionName); + var exist = await DoesCollectionExist(collectionName); if (!exist) { return results; @@ -221,7 +229,8 @@ public async Task> Search(string collectionNam { payloadSelector.Include = new PayloadIncludeSelector { Fields = { fields.ToArray() } }; } - + + var client = GetClient(); var points = await client.SearchAsync(collectionName, vector, limit: (ulong)limit, @@ -244,33 +253,27 @@ public async Task DeleteCollectionData(string collectionName, List i { if (ids.IsNullOrEmpty()) return false; - var client = GetClient(); - var exist = await DoesCollectionExist(client, collectionName); + var exist = await DoesCollectionExist(collectionName); if (!exist) { return false; } + var client = GetClient(); var result = await client.DeleteAsync(collectionName, ids); return result.Status == UpdateStatus.Completed; } public async Task DeleteCollectionAllData(string collectionName) { - var client = GetClient(); - var exist = await DoesCollectionExist(client, collectionName); + var exist = await DoesCollectionExist(collectionName); if (!exist) { return false; } + var client = GetClient(); var result = await client.DeleteAsync(collectionName, new Filter()); return result.Status == UpdateStatus.Completed; } - - - private async Task DoesCollectionExist(QdrantClient client, string collectionName) - { - return await client.CollectionExistsAsync(collectionName); - } } diff --git a/src/Plugins/BotSharp.Plugin.SemanticKernel/SemanticKernelMemoryStoreProvider.cs b/src/Plugins/BotSharp.Plugin.SemanticKernel/SemanticKernelMemoryStoreProvider.cs index bf086db9d..5a5dd5a78 100644 --- a/src/Plugins/BotSharp.Plugin.SemanticKernel/SemanticKernelMemoryStoreProvider.cs +++ b/src/Plugins/BotSharp.Plugin.SemanticKernel/SemanticKernelMemoryStoreProvider.cs @@ -25,6 +25,12 @@ public SemanticKernelMemoryStoreProvider(IMemoryStore memoryStore) public string Provider => "SemanticKernel"; + + public async Task DoesCollectionExist(string collectionName) + { + return false; + } + public async Task CreateCollection(string collectionName, int dimension) { await _memoryStore.CreateCollectionAsync(collectionName);