Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace BotSharp.Abstraction.Files.Converters;

public interface IPdf2ImageConverter
{
/// <summary>
/// Convert pdf pages to images, and return a list of image file paths
/// </summary>
/// <param name="pdfLocation">Pdf file location</param>
/// <param name="imageFolderLocation">Image folder location</param>
/// <returns></returns>
Task<IEnumerable<string>> ConvertPdfToImages(string pdfLocation, string imageFolderLocation);
}
8 changes: 8 additions & 0 deletions src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,19 @@
<PackageReference Include="Fluid.Core" Version="2.8.0" />
<PackageReference Include="Microsoft.AspNetCore.StaticFiles" Version="2.2.0" />
<PackageReference Include="Nanoid" Version="3.0.0" />
<PackageReference Include="PdfiumViewer" Version="2.13.0" />
<PackageReference Include="PdfiumViewer.Native.x86.v8-xfa" Version="2018.4.8.256" />
<PackageReference Include="PdfiumViewer.Native.x86_64.v8-xfa" Version="2018.4.8.256" />
<PackageReference Include="RedLock.net" Version="2.3.2" />
<PackageReference Include="System.Drawing.Common" Version="8.0.6" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\BotSharp.Abstraction\BotSharp.Abstraction.csproj" />
</ItemGroup>

<ItemGroup>
<Folder Include="Translation\Models\" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
using BotSharp.Abstraction.Browsing;
using BotSharp.Abstraction.Browsing.Models;
using BotSharp.Abstraction.Files.Converters;
using BotSharp.Core.Files.Converters;
using Microsoft.EntityFrameworkCore;
using System.IO;
using System.Linq;
Expand Down Expand Up @@ -51,18 +51,8 @@ private async Task<List<MessageFileModel>> GetMessageFiles(string conversationId

try
{
var msgInfo = new MessageInfo
{
ContextId = Guid.NewGuid().ToString()
};
var web = _services.GetRequiredService<IWebBrowser>();
var preFixPath = Path.Combine(_baseDir, CONVERSATION_FOLDER, conversationId, FILE_FOLDER);

if (isNeedScreenShot)
{
await web.LaunchBrowser(msgInfo);
}

foreach (var messageId in messageIds)
{
var dir = Path.Combine(preFixPath, messageId, source);
Expand Down Expand Up @@ -91,40 +81,40 @@ private async Task<List<MessageFileModel>> GetMessageFiles(string conversationId
var screenShotDir = Path.Combine(subDir, SCREENSHOT_FILE_FOLDER);
if (ExistDirectory(screenShotDir) && Directory.GetFiles(screenShotDir).Any())
{
file = Directory.GetFiles(screenShotDir).First();
contentType = GetFileContentType(file);

var model = new MessageFileModel()
foreach (var screenShot in Directory.GetFiles(screenShotDir))
{
MessageId = messageId,
FileStorageUrl = file,
ContentType = contentType
};
files.Add(model);
contentType = GetFileContentType(screenShot);
if (!_allowedImageTypes.Contains(contentType)) continue;

var model = new MessageFileModel()
{
MessageId = messageId,
FileStorageUrl = screenShot,
ContentType = contentType
};
files.Add(model);
}
}
else
{
await web.GoToPage(msgInfo, new PageActionArgs { Url = file });
var path = Path.Combine(subDir, SCREENSHOT_FILE_FOLDER, $"{Guid.NewGuid()}.png");
await web.ScreenshotAsync(msgInfo, path);
contentType = GetFileContentType(path);
var screenShotPath = Path.Combine(subDir, SCREENSHOT_FILE_FOLDER);
var images = await ConvertPdfToImages(file, screenShotPath);

var model = new MessageFileModel()
foreach (var image in images)
{
MessageId = messageId,
FileStorageUrl = path,
ContentType = contentType
};
files.Add(model);
contentType = GetFileContentType(image);
var model = new MessageFileModel()
{
MessageId = messageId,
FileStorageUrl = image,
ContentType = contentType
};
files.Add(model);
}
}
}
}
}

if (isNeedScreenShot)
{
await web.CloseBrowser(msgInfo.ContextId);
}
}
catch (Exception ex)
{
Expand Down Expand Up @@ -227,9 +217,13 @@ public bool SaveMessageFiles(string conversationId, string messageId, string sou
Directory.CreateDirectory(subDir);
}

using var fs = new FileStream(Path.Combine(subDir, file.FileName), FileMode.Create);
fs.Write(bytes, 0, bytes.Length);
fs.Flush(true);
using (var fs = new FileStream(Path.Combine(subDir, file.FileName), FileMode.Create))
{
fs.Write(bytes, 0, bytes.Length);
fs.Flush(true);
fs.Close();
Thread.Sleep(100);
}
}

return true;
Expand Down Expand Up @@ -318,5 +312,20 @@ private string GetConversationFileDirectory(string? conversationId, string? mess
var dir = Path.Combine(_baseDir, CONVERSATION_FOLDER, conversationId);
return dir;
}

private async Task<IEnumerable<string>> ConvertPdfToImages(string pdfLoc, string imageLoc)
{
var converters = _services.GetServices<IPdf2ImageConverter>();
if (converters.IsNullOrEmpty()) return Enumerable.Empty<string>();

var converter = converters.FirstOrDefault(x => x.GetType().Name != typeof(PdfiumConverter).Name);
if (converter == null)
{
converter = converters.FirstOrDefault(x => x.GetType().Name == typeof(PdfiumConverter).Name);
if (converter == null) return Enumerable.Empty<string>();
}

return await converter.ConvertPdfToImages(pdfLoc, imageLoc);
}
#endregion
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
using BotSharp.Abstraction.Files.Converters;
using PdfiumViewer;
using System.IO;

namespace BotSharp.Core.Files.Converters;

public class PdfiumConverter : IPdf2ImageConverter
{
public async Task<IEnumerable<string>> ConvertPdfToImages(string pdfLocation, string imageFolderLocation)
{
var paths = new List<string>();
if (string.IsNullOrWhiteSpace(imageFolderLocation)) return paths;

if (Directory.Exists(imageFolderLocation))
{
Directory.Delete(imageFolderLocation, true);
}
Directory.CreateDirectory(imageFolderLocation);

var guid = Guid.NewGuid().ToString();
using (var document = PdfDocument.Load(pdfLocation))
{
var pages = document.PageCount;

for (var page = 0; page < pages; page++)
{
var size = document.PageSizes[page];
using (var image = document.Render(page, (int)size.Width, (int)size.Height, 96, 96, true))
{
var imagePath = Path.Combine(imageFolderLocation, $"{guid}_pg_{page + 1}.png");
image.Save(imagePath, System.Drawing.Imaging.ImageFormat.Png);
paths.Add(imagePath);
}
}
}

return await Task.FromResult(paths);
}
}
3 changes: 3 additions & 0 deletions src/Infrastructure/BotSharp.Core/Files/FilePlugin.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using BotSharp.Abstraction.Files.Converters;
using BotSharp.Core.Files.Converters;
using BotSharp.Core.Files.Hooks;
using Microsoft.Extensions.Configuration;

Expand All @@ -18,5 +20,6 @@ public void RegisterDI(IServiceCollection services, IConfiguration config)

services.AddScoped<IAgentHook, FileAnalyzerHook>();
services.AddScoped<IAgentToolHook, FileAnalyzerToolHook>();
services.AddScoped<IPdf2ImageConverter, PdfiumConverter>();
}
}