Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 29 additions & 7 deletions service/Abstractions/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,37 @@ public static class Constants
public const string HttpIndexPlaceholder = "{index}";
public const string HttpDocumentIdPlaceholder = "{documentId}";

// Handlers
public const string DeleteDocumentPipelineStepName = "private_delete_document";
public const string DeleteIndexPipelineStepName = "private_delete_index";
// Pipeline Handlers, Step names
public const string PipelineStepsExtract = "extract";
public const string PipelineStepsPartition = "partition";
public const string PipelineStepsGenEmbeddings = "gen_embeddings";
public const string PipelineStepsSaveRecords = "save_records";
public const string PipelineStepsSummarize = "summarize";
public const string PipelineStepsDeleteGeneratedFiles = "delete_generated_files";
public const string PipelineStepsDeleteDocument = "private_delete_document";
public const string PipelineStepsDeleteIndex = "private_delete_index";

// Pipeline steps
public static readonly string[] DefaultPipeline = { "extract", "partition", "gen_embeddings", "save_records" };
public static readonly string[] PipelineWithoutSummary = { "extract", "partition", "gen_embeddings", "save_records" };
public static readonly string[] PipelineWithSummary = { "extract", "partition", "gen_embeddings", "save_records", "summarize", "gen_embeddings", "save_records" };
public static readonly string[] PipelineOnlySummary = { "extract", "summarize", "gen_embeddings", "save_records" };
public static readonly string[] DefaultPipeline =
{
PipelineStepsExtract, PipelineStepsPartition, PipelineStepsGenEmbeddings, PipelineStepsSaveRecords
};

public static readonly string[] PipelineWithoutSummary =
{
PipelineStepsExtract, PipelineStepsPartition, PipelineStepsGenEmbeddings, PipelineStepsSaveRecords
};

public static readonly string[] PipelineWithSummary =
{
PipelineStepsExtract, PipelineStepsPartition, PipelineStepsGenEmbeddings, PipelineStepsSaveRecords,
PipelineStepsSummarize, PipelineStepsGenEmbeddings, PipelineStepsSaveRecords
};

public static readonly string[] PipelineOnlySummary =
{
PipelineStepsExtract, PipelineStepsSummarize, PipelineStepsGenEmbeddings, PipelineStepsSaveRecords
};

// Standard prompt names
public const string PromptNamesSummarize = "summarize";
Expand Down
4 changes: 2 additions & 2 deletions service/Abstractions/Pipeline/DataPipeline.cs
Original file line number Diff line number Diff line change
Expand Up @@ -368,12 +368,12 @@ public string RollbackToPreviousStep()

public bool IsDocumentDeletionPipeline()
{
return this.Steps.Count == 1 && this.Steps.First() == Constants.DeleteDocumentPipelineStepName;
return this.Steps.Count == 1 && this.Steps.First() == Constants.PipelineStepsDeleteDocument;
}

public bool IsIndexDeletionPipeline()
{
return this.Steps.Count == 1 && this.Steps.First() == Constants.DeleteIndexPipelineStepName;
return this.Steps.Count == 1 && this.Steps.First() == Constants.PipelineStepsDeleteIndex;
}

public void Validate()
Expand Down
45 changes: 45 additions & 0 deletions service/Core/Handlers/DeleteGeneratedFilesHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.KernelMemory.ContentStorage;
using Microsoft.KernelMemory.Diagnostics;
using Microsoft.KernelMemory.Pipeline;

namespace Microsoft.KernelMemory.Handlers;

public class DeleteGeneratedFilesHandler : IPipelineStepHandler
{
private readonly IContentStorage _contentStorage;
private readonly ILogger<DeleteGeneratedFilesHandler> _log;

public string StepName { get; }

public DeleteGeneratedFilesHandler(
string stepName,
IContentStorage contentStorage,
ILogger<DeleteGeneratedFilesHandler>? log = null)
{
this.StepName = stepName;
this._contentStorage = contentStorage;
this._log = log ?? DefaultLogger<DeleteGeneratedFilesHandler>.Instance;

this._log.LogInformation("Handler '{0}' ready", stepName);
}

/// <inheritdoc />
public async Task<(bool success, DataPipeline updatedPipeline)> InvokeAsync(
DataPipeline pipeline, CancellationToken cancellationToken = default)
{
this._log.LogDebug("Deleting generated files, pipeline '{0}/{1}'", pipeline.Index, pipeline.DocumentId);

// Delete files, leaving the status file
await this._contentStorage.EmptyDocumentDirectoryAsync(
index: pipeline.Index,
documentId: pipeline.DocumentId,
cancellationToken).ConfigureAwait(false);

return (true, pipeline);
}
}
23 changes: 14 additions & 9 deletions service/Core/KernelMemoryBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -510,10 +510,13 @@ private MemoryServerless BuildServerlessClient()
=> ActivatorUtilities.CreateInstance<SaveRecordsHandler>(serviceProvider, "save_records"));

this._memoryServiceCollection.AddTransient<DeleteDocumentHandler>(serviceProvider
=> ActivatorUtilities.CreateInstance<DeleteDocumentHandler>(serviceProvider, Constants.DeleteDocumentPipelineStepName));
=> ActivatorUtilities.CreateInstance<DeleteDocumentHandler>(serviceProvider, Constants.PipelineStepsDeleteDocument));

this._memoryServiceCollection.AddTransient<DeleteIndexHandler>(serviceProvider
=> ActivatorUtilities.CreateInstance<DeleteIndexHandler>(serviceProvider, Constants.DeleteIndexPipelineStepName));
=> ActivatorUtilities.CreateInstance<DeleteIndexHandler>(serviceProvider, Constants.PipelineStepsDeleteIndex));

this._memoryServiceCollection.AddTransient<DeleteGeneratedFilesHandler>(serviceProvider
=> ActivatorUtilities.CreateInstance<DeleteGeneratedFilesHandler>(serviceProvider, Constants.PipelineStepsDeleteGeneratedFiles));
}

var serviceProvider = this._memoryServiceCollection.BuildServiceProvider();
Expand Down Expand Up @@ -544,6 +547,7 @@ private MemoryServerless BuildServerlessClient()
memoryClientInstance.AddHandler(serviceProvider.GetService<SaveRecordsHandler>() ?? throw new ConfigurationException("Unable to build " + nameof(SaveRecordsHandler)));
memoryClientInstance.AddHandler(serviceProvider.GetService<DeleteDocumentHandler>() ?? throw new ConfigurationException("Unable to build " + nameof(DeleteDocumentHandler)));
memoryClientInstance.AddHandler(serviceProvider.GetService<DeleteIndexHandler>() ?? throw new ConfigurationException("Unable to build " + nameof(DeleteIndexHandler)));
memoryClientInstance.AddHandler(serviceProvider.GetService<DeleteGeneratedFilesHandler>() ?? throw new ConfigurationException("Unable to build " + nameof(DeleteGeneratedFilesHandler)));
}

return memoryClientInstance;
Expand Down Expand Up @@ -583,13 +587,14 @@ private MemoryService BuildAsyncClient()

// Handlers - Register these handlers to run as hosted services in the caller app.
// At start each hosted handler calls IPipelineOrchestrator.AddHandlerAsync() to register in the orchestrator.
this._hostServiceCollection.AddHandlerAsHostedService<TextExtractionHandler>("extract");
this._hostServiceCollection.AddHandlerAsHostedService<SummarizationHandler>("summarize");
this._hostServiceCollection.AddHandlerAsHostedService<TextPartitioningHandler>("partition");
this._hostServiceCollection.AddHandlerAsHostedService<GenerateEmbeddingsHandler>("gen_embeddings");
this._hostServiceCollection.AddHandlerAsHostedService<SaveRecordsHandler>("save_records");
this._hostServiceCollection.AddHandlerAsHostedService<DeleteDocumentHandler>(Constants.DeleteDocumentPipelineStepName);
this._hostServiceCollection.AddHandlerAsHostedService<DeleteIndexHandler>(Constants.DeleteIndexPipelineStepName);
this._hostServiceCollection.AddHandlerAsHostedService<TextExtractionHandler>(Constants.PipelineStepsExtract);
this._hostServiceCollection.AddHandlerAsHostedService<TextPartitioningHandler>(Constants.PipelineStepsPartition);
this._hostServiceCollection.AddHandlerAsHostedService<GenerateEmbeddingsHandler>(Constants.PipelineStepsGenEmbeddings);
this._hostServiceCollection.AddHandlerAsHostedService<SaveRecordsHandler>(Constants.PipelineStepsSaveRecords);
this._hostServiceCollection.AddHandlerAsHostedService<SummarizationHandler>(Constants.PipelineStepsSummarize);
this._hostServiceCollection.AddHandlerAsHostedService<DeleteDocumentHandler>(Constants.PipelineStepsDeleteDocument);
this._hostServiceCollection.AddHandlerAsHostedService<DeleteIndexHandler>(Constants.PipelineStepsDeleteIndex);
this._hostServiceCollection.AddHandlerAsHostedService<DeleteGeneratedFilesHandler>(Constants.PipelineStepsDeleteGeneratedFiles);
}

this.CheckForMissingDependencies();
Expand Down
4 changes: 2 additions & 2 deletions service/Core/Pipeline/BaseOrchestrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ protected static DataPipeline PrepareIndexDeletion(string? index)
DocumentId = string.Empty,
};

return pipeline.Then(Constants.DeleteIndexPipelineStepName).Build();
return pipeline.Then(Constants.PipelineStepsDeleteIndex).Build();
}

protected static DataPipeline PrepareDocumentDeletion(string? index, string documentId)
Expand All @@ -295,7 +295,7 @@ protected static DataPipeline PrepareDocumentDeletion(string? index, string docu
DocumentId = documentId,
};

return pipeline.Then(Constants.DeleteDocumentPipelineStepName).Build();
return pipeline.Then(Constants.PipelineStepsDeleteDocument).Build();
}

protected async Task UploadFilesAsync(DataPipeline currentPipeline, CancellationToken cancellationToken = default)
Expand Down