From 66232dad1aa09ee9d548300bb8a64d39dda85198 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Tue, 3 Dec 2024 14:25:42 +0100 Subject: [PATCH] Implemented embedding provider configuration --- .../Dialogs/EmbeddingDialog.razor | 118 ++++++++ .../Dialogs/EmbeddingDialog.razor.cs | 258 ++++++++++++++++++ app/MindWork AI Studio/Pages/Settings.razor | 67 +++++ .../Pages/Settings.razor.cs | 98 +++++++ .../Settings/DataModel/Data.cs | 10 + .../Settings/EmbeddingProvider.cs | 32 +++ 6 files changed, 583 insertions(+) create mode 100644 app/MindWork AI Studio/Dialogs/EmbeddingDialog.razor create mode 100644 app/MindWork AI Studio/Dialogs/EmbeddingDialog.razor.cs create mode 100644 app/MindWork AI Studio/Settings/EmbeddingProvider.cs diff --git a/app/MindWork AI Studio/Dialogs/EmbeddingDialog.razor b/app/MindWork AI Studio/Dialogs/EmbeddingDialog.razor new file mode 100644 index 00000000..e8e34204 --- /dev/null +++ b/app/MindWork AI Studio/Dialogs/EmbeddingDialog.razor @@ -0,0 +1,118 @@ +@using AIStudio.Provider +@using AIStudio.Provider.SelfHosted + + + + + + @* ReSharper disable once CSharpWarnings::CS8974 *@ + + @foreach (LLMProviders provider in Enum.GetValues(typeof(LLMProviders))) + { + if (provider.ProvideEmbeddings()) + { + @provider + } + } + + Create account + + + @* ReSharper disable once CSharpWarnings::CS8974 *@ + + + + + + @foreach (Host host in Enum.GetValues(typeof(Host))) + { + if (host.AreEmbeddingsSupported()) + { + @host.Name() + } + } + + + + @if (this.DataLLMProvider.IsEmbeddingModelProvidedManually()) + { + + } + else + { + Load + + @foreach (var model in this.availableModels) + { + @model + } + + } + + + @* ReSharper disable once CSharpWarnings::CS8974 *@ + + + + + + + Cancel + + @if(this.IsEditing) + { + @:Update + } + else + { + @:Add + } + + + \ No newline at end of file diff --git a/app/MindWork AI Studio/Dialogs/EmbeddingDialog.razor.cs b/app/MindWork AI Studio/Dialogs/EmbeddingDialog.razor.cs new file mode 100644 index 00000000..5494ac0c --- /dev/null +++ b/app/MindWork AI Studio/Dialogs/EmbeddingDialog.razor.cs @@ -0,0 +1,258 @@ +using AIStudio.Provider; +using AIStudio.Settings; +using AIStudio.Tools.Validation; + +using Microsoft.AspNetCore.Components; + +using Host = AIStudio.Provider.SelfHosted.Host; + +namespace AIStudio.Dialogs; + +public partial class EmbeddingDialog : ComponentBase, ISecretId +{ + [CascadingParameter] + private MudDialogInstance MudDialog { get; set; } = null!; + + /// + /// The embedding's number in the list. + /// + [Parameter] + public uint DataNum { get; set; } + + /// + /// The embedding's ID. + /// + [Parameter] + public string DataId { get; set; } = Guid.NewGuid().ToString(); + + /// + /// The user chosen name. + /// + [Parameter] + public string DataName { get; set; } = string.Empty; + + /// + /// The chosen hostname for self-hosted providers. + /// + [Parameter] + public string DataHostname { get; set; } = string.Empty; + + /// + /// The host to use, e.g., llama.cpp. + /// + [Parameter] + public Host DataHost { get; set; } = Host.NONE; + + /// + /// Is this provider self-hosted? + /// + [Parameter] + public bool IsSelfHosted { get; set; } + + /// + /// The provider to use. + /// + [Parameter] + public LLMProviders DataLLMProvider { get; set; } = LLMProviders.NONE; + + /// + /// The embedding model to use. + /// + [Parameter] + public Model DataModel { get; set; } + + /// + /// Should the dialog be in editing mode? + /// + [Parameter] + public bool IsEditing { get; init; } + + [Inject] + private SettingsManager SettingsManager { get; init; } = null!; + + [Inject] + private ILogger Logger { get; init; } = null!; + + [Inject] + private RustService RustService { get; init; } = null!; + + private static readonly Dictionary SPELLCHECK_ATTRIBUTES = new(); + + /// + /// The list of used instance names. We need this to check for uniqueness. + /// + private List UsedInstanceNames { get; set; } = []; + + private bool dataIsValid; + private string[] dataIssues = []; + private string dataAPIKey = string.Empty; + private string dataManuallyModel = string.Empty; + private string dataAPIKeyStorageIssue = string.Empty; + private string dataEditingPreviousInstanceName = string.Empty; + + // We get the form reference from Blazor code to validate it manually: + private MudForm form = null!; + + private readonly List availableModels = new(); + private readonly Encryption encryption = Program.ENCRYPTION; + private readonly ProviderValidation providerValidation; + + public EmbeddingDialog() + { + this.providerValidation = new() + { + GetProvider = () => this.DataLLMProvider, + GetAPIKeyStorageIssue = () => this.dataAPIKeyStorageIssue, + GetPreviousInstanceName = () => this.dataEditingPreviousInstanceName, + GetUsedInstanceNames = () => this.UsedInstanceNames, + GetHost = () => this.DataHost, + }; + } + + private EmbeddingProvider CreateEmbeddingProviderSettings() + { + var cleanedHostname = this.DataHostname.Trim(); + return new() + { + Num = this.DataNum, + Id = this.DataId, + Name = this.DataName, + UsedLLMProvider = this.DataLLMProvider, + Model = this.DataLLMProvider is LLMProviders.SELF_HOSTED ? new Model(this.dataManuallyModel, null) : this.DataModel, + IsSelfHosted = this.DataLLMProvider is LLMProviders.SELF_HOSTED, + Hostname = cleanedHostname.EndsWith('/') ? cleanedHostname[..^1] : cleanedHostname, + Host = this.DataHost, + }; + } + + #region Overrides of ComponentBase + + protected override async Task OnInitializedAsync() + { + // Configure the spellchecking for the instance name input: + this.SettingsManager.InjectSpellchecking(SPELLCHECK_ATTRIBUTES); + + // Load the used instance names: + this.UsedInstanceNames = this.SettingsManager.ConfigurationData.EmbeddingProviders.Select(x => x.Name.ToLowerInvariant()).ToList(); + + // When editing, we need to load the data: + if(this.IsEditing) + { + this.dataEditingPreviousInstanceName = this.DataName.ToLowerInvariant(); + + // When using self-hosted embedding, we must copy the model name: + if (this.DataLLMProvider is LLMProviders.SELF_HOSTED) + this.dataManuallyModel = this.DataModel.Id; + + // + // We cannot load the API key for self-hosted providers: + // + if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA) + { + await this.ReloadModels(); + await base.OnInitializedAsync(); + return; + } + + // Load the API key: + var requestedSecret = await this.RustService.GetAPIKey(this, isTrying: this.DataLLMProvider is LLMProviders.SELF_HOSTED); + if (requestedSecret.Success) + this.dataAPIKey = await requestedSecret.Secret.Decrypt(this.encryption); + else + { + this.dataAPIKey = string.Empty; + if (this.DataLLMProvider is not LLMProviders.SELF_HOSTED) + { + this.dataAPIKeyStorageIssue = $"Failed to load the API key from the operating system. The message was: {requestedSecret.Issue}. You might ignore this message and provide the API key again."; + await this.form.Validate(); + } + } + + await this.ReloadModels(); + } + + await base.OnInitializedAsync(); + } + + protected override async Task OnAfterRenderAsync(bool firstRender) + { + // Reset the validation when not editing and on the first render. + // We don't want to show validation errors when the user opens the dialog. + if(!this.IsEditing && firstRender) + this.form.ResetValidation(); + + await base.OnAfterRenderAsync(firstRender); + } + + #endregion + + #region Implementation of ISecretId + + public string SecretId => this.DataId; + + public string SecretName => this.DataName; + + #endregion + + private async Task Store() + { + await this.form.Validate(); + if (!string.IsNullOrWhiteSpace(this.dataAPIKeyStorageIssue)) + this.dataAPIKeyStorageIssue = string.Empty; + + // When the data is not valid, we don't store it: + if (!this.dataIsValid) + return; + + // Use the data model to store the provider. + // We just return this data to the parent component: + var addedProviderSettings = this.CreateEmbeddingProviderSettings(); + if (!string.IsNullOrWhiteSpace(this.dataAPIKey)) + { + // Store the API key in the OS secure storage: + var storeResponse = await this.RustService.SetAPIKey(this, this.dataAPIKey); + if (!storeResponse.Success) + { + this.dataAPIKeyStorageIssue = $"Failed to store the API key in the operating system. The message was: {storeResponse.Issue}. Please try again."; + await this.form.Validate(); + return; + } + } + + this.MudDialog.Close(DialogResult.Ok(addedProviderSettings)); + } + + private string? ValidateManuallyModel(string manuallyModel) + { + if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && string.IsNullOrWhiteSpace(manuallyModel)) + return "Please enter an embedding model name."; + + return null; + } + + private void Cancel() => this.MudDialog.Cancel(); + + private async Task ReloadModels() + { + var currentEmbeddingProviderSettings = this.CreateEmbeddingProviderSettings(); + var provider = currentEmbeddingProviderSettings.CreateProvider(this.Logger); + if(provider is NoProvider) + return; + + var models = await provider.GetEmbeddingModels(this.dataAPIKey); + + // Order descending by ID means that the newest models probably come first: + var orderedModels = models.OrderByDescending(n => n.Id); + + this.availableModels.Clear(); + this.availableModels.AddRange(orderedModels); + } + + private string APIKeyText => this.DataLLMProvider switch + { + LLMProviders.SELF_HOSTED => "(Optional) API Key", + _ => "API Key", + }; + + private bool IsNoneProvider => this.DataLLMProvider is LLMProviders.NONE; +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Pages/Settings.razor b/app/MindWork AI Studio/Pages/Settings.razor index e5366386..52d27e59 100644 --- a/app/MindWork AI Studio/Pages/Settings.razor +++ b/app/MindWork AI Studio/Pages/Settings.razor @@ -131,6 +131,73 @@ + @if (this.SettingsManager.ConfigurationData.App.PreviewVisibility >= PreviewVisibility.PROTOTYPE) + { + + + + Configured Embeddings + + + Embeddings are a way to represent words, sentences, entire documents, or even images and videos as digital + fingerprints. Just like each person has a unique fingerprint, embedding models create unique digital patterns + that capture the meaning and characteristics of the content they analyze. When two things are similar in meaning + or content, their digital fingerprints will look very similar. For example, the fingerprints for 'happy' and + 'joyful' would be more alike than those for 'happy' and 'sad'. + + + + This helps AI Studio understand and compare things in a way that's similar to how humans do. When you're working on + something, AI Studio can automatically identify related documents and data by comparing their digital fingerprints. + For instance, if you're writing about customer service, AI Studio can instantly find other documents in your data that + discuss similar topics or experiences, even if they use different words. + + + + + + + + + + + # + Name + Provider + Model + Actions + + + @context.Num + @context.Name + @context.UsedLLMProvider + @this.GetEmbeddingProviderModelName(context) + + + + Open Dashboard + + + Edit + + + Delete + + + + + + @if (this.SettingsManager.ConfigurationData.EmbeddingProviders.Count == 0) + { + No embeddings configured yet. + } + + + Add Embedding + + + } + Your Profiles diff --git a/app/MindWork AI Studio/Pages/Settings.razor.cs b/app/MindWork AI Studio/Pages/Settings.razor.cs index e338a67f..a59859b3 100644 --- a/app/MindWork AI Studio/Pages/Settings.razor.cs +++ b/app/MindWork AI Studio/Pages/Settings.razor.cs @@ -26,6 +26,7 @@ public partial class Settings : ComponentBase, IMessageBusReceiver, IDisposable private RustService RustService { get; init; } = null!; private readonly List> availableLLMProviders = new(); + private readonly List> availableEmbeddingProviders = new(); #region Overrides of ComponentBase @@ -160,6 +161,103 @@ public partial class Settings : ComponentBase, IMessageBusReceiver, IDisposable #endregion + #region Embedding provider related + + private string GetEmbeddingProviderModelName(EmbeddingProvider provider) + { + const int MAX_LENGTH = 36; + var modelName = provider.Model.ToString(); + return modelName.Length > MAX_LENGTH ? "[...] " + modelName[^Math.Min(MAX_LENGTH, modelName.Length)..] : modelName; + } + + private async Task AddEmbeddingProvider() + { + var dialogParameters = new DialogParameters + { + { x => x.IsEditing, false }, + }; + + var dialogReference = await this.DialogService.ShowAsync("Add Embedding Provider", dialogParameters, DialogOptions.FULLSCREEN); + var dialogResult = await dialogReference.Result; + if (dialogResult is null || dialogResult.Canceled) + return; + + var addedEmbedding = (EmbeddingProvider)dialogResult.Data!; + addedEmbedding = addedEmbedding with { Num = this.SettingsManager.ConfigurationData.NextEmbeddingNum++ }; + + this.SettingsManager.ConfigurationData.EmbeddingProviders.Add(addedEmbedding); + this.UpdateEmbeddingProviders(); + + await this.SettingsManager.StoreSettings(); + await this.MessageBus.SendMessage(this, Event.CONFIGURATION_CHANGED); + } + + private async Task EditEmbeddingProvider(EmbeddingProvider embeddingProvider) + { + var dialogParameters = new DialogParameters + { + { x => x.DataNum, embeddingProvider.Num }, + { x => x.DataId, embeddingProvider.Id }, + { x => x.DataName, embeddingProvider.Name }, + { x => x.DataLLMProvider, embeddingProvider.UsedLLMProvider }, + { x => x.DataModel, embeddingProvider.Model }, + { x => x.DataHostname, embeddingProvider.Hostname }, + { x => x.IsSelfHosted, embeddingProvider.IsSelfHosted }, + { x => x.IsEditing, true }, + { x => x.DataHost, embeddingProvider.Host }, + }; + + var dialogReference = await this.DialogService.ShowAsync("Edit Embedding Provider", dialogParameters, DialogOptions.FULLSCREEN); + var dialogResult = await dialogReference.Result; + if (dialogResult is null || dialogResult.Canceled) + return; + + var editedEmbeddingProvider = (EmbeddingProvider)dialogResult.Data!; + + // Set the provider number if it's not set. This is important for providers + // added before we started saving the provider number. + if(editedEmbeddingProvider.Num == 0) + editedEmbeddingProvider = editedEmbeddingProvider with { Num = this.SettingsManager.ConfigurationData.NextEmbeddingNum++ }; + + this.SettingsManager.ConfigurationData.EmbeddingProviders[this.SettingsManager.ConfigurationData.EmbeddingProviders.IndexOf(embeddingProvider)] = editedEmbeddingProvider; + this.UpdateEmbeddingProviders(); + + await this.SettingsManager.StoreSettings(); + await this.MessageBus.SendMessage(this, Event.CONFIGURATION_CHANGED); + } + + private async Task DeleteEmbeddingProvider(EmbeddingProvider provider) + { + var dialogParameters = new DialogParameters + { + { "Message", $"Are you sure you want to delete the embedding provider '{provider.Name}'?" }, + }; + + var dialogReference = await this.DialogService.ShowAsync("Delete Embedding Provider", dialogParameters, DialogOptions.FULLSCREEN); + var dialogResult = await dialogReference.Result; + if (dialogResult is null || dialogResult.Canceled) + return; + + var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider); + if(deleteSecretResponse.Success) + { + this.SettingsManager.ConfigurationData.EmbeddingProviders.Remove(provider); + await this.SettingsManager.StoreSettings(); + } + + this.UpdateEmbeddingProviders(); + await this.MessageBus.SendMessage(this, Event.CONFIGURATION_CHANGED); + } + + private void UpdateEmbeddingProviders() + { + this.availableEmbeddingProviders.Clear(); + foreach (var provider in this.SettingsManager.ConfigurationData.EmbeddingProviders) + this.availableEmbeddingProviders.Add(new (provider.Name, provider.Id)); + } + + #endregion + #region Profile related private async Task AddProfile() diff --git a/app/MindWork AI Studio/Settings/DataModel/Data.cs b/app/MindWork AI Studio/Settings/DataModel/Data.cs index 59ceaae9..e2b678e8 100644 --- a/app/MindWork AI Studio/Settings/DataModel/Data.cs +++ b/app/MindWork AI Studio/Settings/DataModel/Data.cs @@ -20,6 +20,11 @@ public sealed class Data /// Settings concerning the LLM providers. /// public DataLLMProviders LLMProviders { get; init; } = new(); + + /// + /// A collection of embedding providers configured. + /// + public List EmbeddingProviders { get; init; } = []; /// /// List of configured profiles. @@ -31,6 +36,11 @@ public sealed class Data /// public uint NextProviderNum { get; set; } = 1; + /// + /// The next embedding number to use. + /// + public uint NextEmbeddingNum { get; set; } = 1; + /// /// The next profile number to use. /// diff --git a/app/MindWork AI Studio/Settings/EmbeddingProvider.cs b/app/MindWork AI Studio/Settings/EmbeddingProvider.cs new file mode 100644 index 00000000..126a0be2 --- /dev/null +++ b/app/MindWork AI Studio/Settings/EmbeddingProvider.cs @@ -0,0 +1,32 @@ +using System.Text.Json.Serialization; + +using AIStudio.Provider; + +using Host = AIStudio.Provider.SelfHosted.Host; + +namespace AIStudio.Settings; + +public readonly record struct EmbeddingProvider( + uint Num, + string Id, + string Name, + LLMProviders UsedLLMProvider, + Model Model, + bool IsSelfHosted = false, + string Hostname = "http://localhost:1234", + Host Host = Host.NONE) : ISecretId +{ + public override string ToString() => this.Name; + + #region Implementation of ISecretId + + /// + [JsonIgnore] + public string SecretId => this.Id; + + /// + [JsonIgnore] + public string SecretName => this.Name; + + #endregion +} \ No newline at end of file