Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace BotSharp.Abstraction.Knowledges
{
public interface IPaddleOcrConverter
{
// void LoadModel();
Task<string> ConvertImageToText(string loadPath);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using System;
using System.Collections.Generic;
using System.Text;
using Microsoft.AspNetCore.Http;

namespace BotSharp.Abstraction.Knowledges
{
public interface IPdf2TextConverter
{
Task<string> ConvertPdfToText(IFormFile formFile, int? startPageNum, int? endPageNum);
}
}
1 change: 1 addition & 0 deletions src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
<PackageReference Include="EntityFrameworkCore.BootKit" Version="6.2.1" />
<PackageReference Include="Fluid.Core" Version="2.4.0" />
<PackageReference Include="TensorFlow.Keras" Version="0.11.2" />
<PackageReference Include="PdfPig" Version="0.1.9-alpha-20230806-4a480" />
</ItemGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using BotSharp.Core.Functions;
using BotSharp.Core.Hooks;
using BotSharp.Core.Templating;
using BotSharp.Core.Plugins.Knowledges.Services;
using Microsoft.AspNetCore.Builder;
using Microsoft.Extensions.Configuration;

Expand Down Expand Up @@ -95,5 +96,7 @@ public static void RegisterPlugins(IServiceCollection services, IConfiguration c
loader.Load();

services.AddSingleton(loader);

services.AddSingleton<IPdf2TextConverter, PigPdf2TextConverter>();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ public class KnowledgeBaseSettings
public string VectorDb { get; set; }
public string TextEmbedding { get; set; }
public string TextCompletion { get; set; }
public string Pdf2TextConverter { get; set; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Microsoft.AspNetCore.Http;
using UglyToad.PdfPig;
using UglyToad.PdfPig.Content;

namespace BotSharp.Core.Plugins.Knowledges.Services;

public class PigPdf2TextConverter : IPdf2TextConverter
{
public async Task<string> ConvertPdfToText(IFormFile formFile, int? startPageNum, int? endPageNum)
{
return await OpenPdfDocumentAsync(formFile, startPageNum, endPageNum);
}

private async Task<string> OpenPdfDocumentAsync(IFormFile formFile, int? startPageNum, int? endPageNum)
{
if (formFile.Length <= 0)
{
return await Task.FromResult(string.Empty);
}

var filePath = Path.GetTempFileName();

using (var stream = System.IO.File.Create(filePath))
{
await formFile.CopyToAsync(stream);
}

var document = PdfDocument.Open(filePath);
var content = "";
foreach (Page page in document.GetPages())
{
if (startPageNum.HasValue && page.Number < startPageNum.Value)
{
continue;
}

if (endPageNum.HasValue && page.Number > endPageNum.Value)
{
continue;
}
content += page.Text;
}
return content;
}
}
4 changes: 2 additions & 2 deletions src/Infrastructure/BotSharp.OpenAPI/BotSharp.OpenAPI.csproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
Expand All @@ -9,11 +9,11 @@

<ItemGroup>
<PackageReference Include="Microsoft.AspNetCore.Mvc.Core" Version="2.2.5" />
<PackageReference Include="PdfPig" Version="0.1.8" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\BotSharp.Abstraction\BotSharp.Abstraction.csproj" />
<ProjectReference Include="..\BotSharp.Core\BotSharp.Core.csproj" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
using Microsoft.AspNetCore.Http;
using UglyToad.PdfPig.Content;
using UglyToad.PdfPig;
using BotSharp.Core.Plugins.Knowledges;


namespace BotSharp.OpenAPI.Controllers;

Expand All @@ -11,11 +13,13 @@ namespace BotSharp.OpenAPI.Controllers;
public class KnowledgeController : ControllerBase, IApiAdapter
{
private readonly IKnowledgeService _knowledgeService;
public KnowledgeController(IKnowledgeService knowledgeService)
private readonly IServiceProvider _services;

public KnowledgeController(IKnowledgeService knowledgeService, IServiceProvider services)
{
_knowledgeService = knowledgeService;
_services = services;
}

[HttpGet("/knowledge/{agentId}")]
public async Task<List<RetrievedResult>> RetrieveKnowledge([FromRoute] string agentId, [FromQuery(Name = "q")] string question)
{
Expand All @@ -27,44 +31,22 @@ public async Task<List<RetrievedResult>> RetrieveKnowledge([FromRoute] string ag
}

[HttpPost("/knowledge/{agentId}")]
public async Task<IActionResult> FeedKnowledge([FromRoute] string agentId, List<IFormFile> files, [FromQuery] int? startPageNum, [FromQuery] int? endPageNum)
public async Task<IActionResult> FeedKnowledge([FromRoute] string agentId, List<IFormFile> files, [FromQuery] int? startPageNum, [FromQuery] int? endPageNum, [FromQuery] bool? paddleModel)
{
var setttings = _services.GetRequiredService<KnowledgeBaseSettings>();
var textConverter = _services.GetServices<IPdf2TextConverter>().First(x => x.GetType().FullName.EndsWith(setttings.Pdf2TextConverter));
long size = files.Sum(f => f.Length);

foreach (var formFile in files)
{
if (formFile.Length <= 0)
{
continue;
}

var filePath = Path.GetTempFileName();

using (var stream = System.IO.File.Create(filePath))
{
await formFile.CopyToAsync(stream);
}

var document = PdfDocument.Open(filePath);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can keep PdfPig as one of the implementation of IPdf2TextConverter rather than delete it.

var content = "";
foreach (Page page in document.GetPages())
{
if (startPageNum.HasValue && page.Number < startPageNum.Value)
{
continue;
}

if (endPageNum.HasValue && page.Number > endPageNum.Value)
{
continue;
}

content += page.Text;
}
content = await textConverter.ConvertPdfToText(formFile, startPageNum, endPageNum);

// Process uploaded files
// Don't rely on or trust the FileName property without validation.

// Add FeedWithMetaData
await _knowledgeService.Feed(new KnowledgeFeedModel
{
AgentId = agentId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Docnet.Core" Version="2.5.0-alpha.1" />
<PackageReference Include="Magick.NET-Q16-AnyCPU" Version="13.2.0" />
<PackageReference Include="Magick.NET.Core" Version="13.2.0" />
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.7.0.20230115" />
<PackageReference Include="Sdcb.PaddleInference" Version="2.4.1.3" />
<PackageReference Include="Sdcb.PaddleInference.runtime.win64.mkl" Version="2.5.1" />
<PackageReference Include="Sdcb.PaddleOCR" Version="2.6.0.5" />
<PackageReference Include="Sdcb.PaddleOCR.Models.LocalV3" Version="2.6.0.5" />
<PackageReference Include="System.Drawing.Common" Version="8.0.0-preview.7.23375.5" />
</ItemGroup>

<ItemGroup>
Expand Down
8 changes: 7 additions & 1 deletion src/Plugins/BotSharp.Plugin.PaddleSharp/PaddleSharpPlugin.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
using BotSharp.Abstraction.Knowledges;
using BotSharp.Abstraction.Plugins;
using BotSharp.Plugin.PaddleSharp.Providers;
using BotSharp.Plugin.PaddleSharp.Settings;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using System;
Expand All @@ -9,6 +12,9 @@ public class PaddleSharpPlugin : IBotSharpPlugin
{
public void RegisterDI(IServiceCollection services, IConfiguration config)
{

var settings = new PaddleSharpSettings();
config.Bind("PaddleSharp", settings);
services.AddSingleton(x => settings);
services.AddSingleton<IPdf2TextConverter, Pdf2TextConverter>();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
using System;
using System.Collections.Generic;
using System.Text;
using Sdcb.PaddleOCR;
using Sdcb.PaddleOCR.Models;
using Sdcb.PaddleInference;
using Sdcb.PaddleOCR.Models.LocalV3;
using OpenCvSharp;
using System.Threading.Tasks;
using BotSharp.Abstraction.Knowledges;
using BotSharp.Plugin.PaddleSharp.Settings;

namespace BotSharp.Plugin.PaddleSharp.Providers;

public class PaddleOcrConverter : IPaddleOcrConverter
{
private FullOcrModel _paddleFullOcrmodel;
private QueuedPaddleOcrAll _allModel;
private readonly PaddleSharpSettings _paddleSharpSettings;

public PaddleOcrConverter(FullOcrModel paddleFullOcrmodel, QueuedPaddleOcrAll allModel, PaddleSharpSettings paddleSharpSettings)
{
_paddleFullOcrmodel = paddleFullOcrmodel;
_allModel = allModel;
_paddleSharpSettings = paddleSharpSettings;
}

private void LoadModel()
{
_allModel = new(() => new PaddleOcrAll(_paddleFullOcrmodel, _paddleSharpSettings.device)
{
AllowRotateDetection = _paddleSharpSettings.allowRotateDetection,
Enable180Classification = _paddleSharpSettings.enable180Classification,
}, consumerCount: _paddleSharpSettings.consumerCount, boundedCapacity: _paddleSharpSettings.boundedCapacity);
}

private void DisposeModel()
{
_allModel.Dispose();
}

public async Task<string> ConvertImageToText(string loadPath)
{
_allModel = new(() => new PaddleOcrAll(_paddleFullOcrmodel, _paddleSharpSettings.device)
{
AllowRotateDetection = _paddleSharpSettings.allowRotateDetection,
Enable180Classification = _paddleSharpSettings.enable180Classification,
}, consumerCount: _paddleSharpSettings.consumerCount, boundedCapacity: _paddleSharpSettings.boundedCapacity);

var contents = "";
using (Mat src = Cv2.ImRead(loadPath))
{
PaddleOcrResult result = await _allModel.Run(src);

foreach (PaddleOcrResultRegion region in result.Regions)
{
if (region.Score > _paddleSharpSettings.acceptScore)
{
contents += region.Text + " ";
}
}
}

_allModel.Dispose();
return contents;
}
}
*/
Loading