From 030990ee90affc390b751343ba1400a656ef92b0 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Sat, 8 Mar 2025 20:13:08 +0100 Subject: [PATCH] Improved data security for chat threads (#317) --- app/MindWork AI Studio/Chat/ChatThread.cs | 5 ++ .../Chat/ChatThreadExtensions.cs | 58 +++++++++++++++++++ .../Chat/ContentBlockComponent.razor | 2 +- app/MindWork AI Studio/Chat/ContentText.cs | 7 +++ .../Components/ChatComponent.razor | 12 +++- .../Components/ChatComponent.razor.cs | 20 +++++++ .../RAG/RAGProcesses/AISrcSelWithRetCtxVal.cs | 51 ++++++++++++++++ .../wwwroot/changelog/v0.9.32.md | 2 + 8 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 app/MindWork AI Studio/Chat/ChatThreadExtensions.cs diff --git a/app/MindWork AI Studio/Chat/ChatThread.cs b/app/MindWork AI Studio/Chat/ChatThread.cs index 73182e7c..d6c870aa 100644 --- a/app/MindWork AI Studio/Chat/ChatThread.cs +++ b/app/MindWork AI Studio/Chat/ChatThread.cs @@ -45,6 +45,11 @@ public sealed record ChatThread /// public string AugmentedData { get; set; } = string.Empty; + /// + /// The data security to use, derived from the data sources used so far. + /// + public DataSourceSecurity DataSecurity { get; set; } = DataSourceSecurity.NOT_SPECIFIED; + /// /// The name of the chat thread. Usually generated by an AI model or manually edited by the user. /// diff --git a/app/MindWork AI Studio/Chat/ChatThreadExtensions.cs b/app/MindWork AI Studio/Chat/ChatThreadExtensions.cs new file mode 100644 index 00000000..6b1b6500 --- /dev/null +++ b/app/MindWork AI Studio/Chat/ChatThreadExtensions.cs @@ -0,0 +1,58 @@ +using AIStudio.Provider.SelfHosted; +using AIStudio.Settings.DataModel; + +namespace AIStudio.Chat; + +public static class ChatThreadExtensions +{ + /// + /// Checks if the specified provider is allowed for the chat thread. + /// + /// + /// We don't check if the provider is allowed to use the data sources of the chat thread. + /// That kind of check is done in the RAG process itself.

+ /// + /// One thing which is not so obvious: after RAG was used on this thread, the entire chat + /// thread is kind of a data source by itself. Why? Because the augmentation data collected + /// from the data sources is stored in the chat thread. This means we must check if the + /// selected provider is allowed to use this thread's data. + ///
+ /// The chat thread to check. + /// The provider to check. + /// True, when the provider is allowed for the chat thread. False, otherwise. + public static bool IsLLMProviderAllowed(this ChatThread? chatThread, T provider) + { + // No chat thread available means we have a new chat. That's fine: + if (chatThread is null) + return true; + + // The chat thread is available, but the data security is not specified. + // Means, we never used RAG or RAG was enabled, but no data sources were selected. + // That's fine as well: + if (chatThread.DataSecurity is DataSourceSecurity.NOT_SPECIFIED) + return true; + + // + // Is the provider self-hosted? + // + var isSelfHostedProvider = provider switch + { + ProviderSelfHosted => true, + AIStudio.Settings.Provider p => p.IsSelfHosted, + + _ => false, + }; + + // + // Check the chat data security against the selected provider: + // + return isSelfHostedProvider switch + { + // The provider is self-hosted -- we can use any data source: + true => true, + + // The provider is not self-hosted -- it depends on the data security of the chat thread: + false => chatThread.DataSecurity is not DataSourceSecurity.SELF_HOSTED, + }; + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Chat/ContentBlockComponent.razor b/app/MindWork AI Studio/Chat/ContentBlockComponent.razor index 826cfdc0..f1c93b65 100644 --- a/app/MindWork AI Studio/Chat/ContentBlockComponent.razor +++ b/app/MindWork AI Studio/Chat/ContentBlockComponent.razor @@ -27,7 +27,7 @@ @if (this.IsLastContentBlock && this.Role is ChatRole.AI && this.RegenerateFunc is not null) { - + } @if (this.RemoveBlockFunc is not null) diff --git a/app/MindWork AI Studio/Chat/ContentText.cs b/app/MindWork AI Studio/Chat/ContentText.cs index 0dccd262..38872edf 100644 --- a/app/MindWork AI Studio/Chat/ContentText.cs +++ b/app/MindWork AI Studio/Chat/ContentText.cs @@ -41,6 +41,13 @@ public sealed class ContentText : IContent if(chatThread is null) return new(); + if(!chatThread.IsLLMProviderAllowed(provider)) + { + var logger = Program.SERVICE_PROVIDER.GetService>()!; + logger.LogError("The provider is not allowed for this chat thread due to data security reasons. Skipping the AI process."); + return chatThread; + } + // Call the RAG process. Right now, we only have one RAG process: if (lastPrompt is not null) { diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor b/app/MindWork AI Studio/Components/ChatComponent.razor index 06dd07b2..703d9175 100644 --- a/app/MindWork AI Studio/Components/ChatComponent.razor +++ b/app/MindWork AI Studio/Components/ChatComponent.razor @@ -24,7 +24,7 @@ IsLastContentBlock="@isLastBlock" IsSecondToLastBlock="@isSecondLastBlock" RegenerateFunc="@this.RegenerateBlock" - RegenerateEnabled="@(() => this.IsProviderSelected)" + RegenerateEnabled="@(() => this.IsProviderSelected && this.ChatThread.IsLLMProviderAllowed(this.Provider))" EditLastBlockFunc="@this.EditLastBlock" EditLastUserBlockFunc="@this.EditLastUserBlock"/> } @@ -46,7 +46,7 @@ Adornment="Adornment.End" AdornmentIcon="@Icons.Material.Filled.Send" OnAdornmentClick="() => this.SendMessage()" - ReadOnly="!this.IsProviderSelected || this.isStreaming" + Disabled="@this.IsInputForbidden()" Immediate="@true" OnKeyUp="this.InputKeyEvent" UserAttributes="@USER_INPUT_ATTRIBUTES" @@ -113,6 +113,14 @@ { } + + @if (!this.ChatThread.IsLLMProviderAllowed(this.Provider)) + { + + + + } + \ No newline at end of file diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor.cs b/app/MindWork AI Studio/Components/ChatComponent.razor.cs index d8dee123..f3b6b742 100644 --- a/app/MindWork AI Studio/Components/ChatComponent.razor.cs +++ b/app/MindWork AI Studio/Components/ChatComponent.razor.cs @@ -340,6 +340,20 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable this.earlyDataSourceOptions = updatedOptions; } + private bool IsInputForbidden() + { + if (!this.IsProviderSelected) + return true; + + if(this.isStreaming) + return true; + + if(!this.ChatThread.IsLLMProviderAllowed(this.Provider)) + return true; + + return false; + } + private async Task InputKeyEvent(KeyboardEventArgs keyEvent) { if(this.dataSourceSelectionComponent?.IsVisible ?? false) @@ -374,6 +388,9 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable if (!this.IsProviderSelected) return; + if(!this.ChatThread.IsLLMProviderAllowed(this.Provider)) + return; + // We need to blur the focus away from the input field // to be able to clear the field: await this.inputField.BlurAsync(); @@ -776,6 +793,9 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable if(this.ChatThread is null) return; + if(!this.ChatThread.IsLLMProviderAllowed(this.Provider)) + return; + this.ChatThread.Remove(aiBlock, removeForRegenerate: true); this.hasUnsavedChanges = true; this.StateHasChanged(); diff --git a/app/MindWork AI Studio/Tools/RAG/RAGProcesses/AISrcSelWithRetCtxVal.cs b/app/MindWork AI Studio/Tools/RAG/RAGProcesses/AISrcSelWithRetCtxVal.cs index ae7c078b..2736f7b0 100644 --- a/app/MindWork AI Studio/Tools/RAG/RAGProcesses/AISrcSelWithRetCtxVal.cs +++ b/app/MindWork AI Studio/Tools/RAG/RAGProcesses/AISrcSelWithRetCtxVal.cs @@ -1,6 +1,7 @@ using AIStudio.Chat; using AIStudio.Provider; using AIStudio.Settings; +using AIStudio.Settings.DataModel; using AIStudio.Tools.RAG.AugmentationProcesses; using AIStudio.Tools.RAG.DataSourceSelectionProcesses; using AIStudio.Tools.Services; @@ -96,6 +97,56 @@ public sealed class AISrcSelWithRetCtxVal : IRagProcess logger.LogWarning("No data sources are selected. The RAG process is skipped."); proceedWithRAG = false; } + else + { + var previousDataSecurity = chatThread.DataSecurity; + + // + // Update the data security of the chat thread. We consider the current data security + // of the chat thread and the data security of the selected data sources: + // + var dataSecurityRestrictedToSelfHosted = selectedDataSources.Any(x => x.SecurityPolicy is DataSourceSecurity.SELF_HOSTED); + chatThread.DataSecurity = dataSecurityRestrictedToSelfHosted switch + { + // + // + // Case: the data sources which are selected have a security policy + // of SELF_HOSTED (at least one data source). + // + // When the policy was already set to ALLOW_ANY, we restrict it + // to SELF_HOSTED. + // + true => DataSourceSecurity.SELF_HOSTED, + + // + // Case: the data sources which are selected have a security policy + // of ALLOW_ANY (none of the data sources has a SELF_HOSTED policy). + // + // When the policy was already set to SELF_HOSTED, we must keep that. + // + false => chatThread.DataSecurity switch + { + // + // When the policy was not specified yet, we set it to ALLOW_ANY. + // + DataSourceSecurity.NOT_SPECIFIED => DataSourceSecurity.ALLOW_ANY, + DataSourceSecurity.ALLOW_ANY => DataSourceSecurity.ALLOW_ANY, + + // + // When the policy was already set to SELF_HOSTED, we must keep that. + // This is important since the thread might already contain data + // from a data source with a SELF_HOSTED policy. + // + DataSourceSecurity.SELF_HOSTED => DataSourceSecurity.SELF_HOSTED, + + // Default case: we use the current data security of the chat thread. + _ => chatThread.DataSecurity, + } + }; + + if (previousDataSecurity != chatThread.DataSecurity) + logger.LogInformation($"The data security of the chat thread was updated from '{previousDataSecurity}' to '{chatThread.DataSecurity}'."); + } // // Trigger the retrieval part of the (R)AG process: diff --git a/app/MindWork AI Studio/wwwroot/changelog/v0.9.32.md b/app/MindWork AI Studio/wwwroot/changelog/v0.9.32.md index 5ff89780..fa8328e3 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v0.9.32.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v0.9.32.md @@ -1,5 +1,6 @@ # v0.9.32, build 207 (2025-03-xx xx:xx UTC) - Added the "Community & Code" section to the about page. It includes links to the GitHub repositories and the project website. +- Improved data security by preventing the use of cloud LLMs after confidential data has been retrieved previously. - Improved the ERI client to expect JSON responses and send JSON requests using camel case. - Improved the ERI client to raise an error when the server responds with additional JSON data that is not expected. - Improved the error handling in the ERI data source info dialog in cases where servers respond with an invalid message. @@ -9,4 +10,5 @@ - Fixed the chat thread we use for the data retrieval by removing the last block, which is meant to be for the final AI answer. - Fixed the data source name for ERI data sources when performing data retrieval. - Fixed the default data source selection when replacing the current chat with a new one. +- Fixed the state of the re-generate button in the chat thread, when no provider is selected or the data security is preventing the use of cloud LLMs. - Upgraded code dependencies. \ No newline at end of file