From 030990ee90affc390b751343ba1400a656ef92b0 Mon Sep 17 00:00:00 2001
From: Thorsten Sommer <SommerEngineering@users.noreply.github.com>
Date: Sat, 8 Mar 2025 20:13:08 +0100
Subject: [PATCH] Improved data security for chat threads (#317)

---
 app/MindWork AI Studio/Chat/ChatThread.cs     |  5 ++
 .../Chat/ChatThreadExtensions.cs              | 58 +++++++++++++++++++
 .../Chat/ContentBlockComponent.razor          |  2 +-
 app/MindWork AI Studio/Chat/ContentText.cs    |  7 +++
 .../Components/ChatComponent.razor            | 12 +++-
 .../Components/ChatComponent.razor.cs         | 20 +++++++
 .../RAG/RAGProcesses/AISrcSelWithRetCtxVal.cs | 51 ++++++++++++++++
 .../wwwroot/changelog/v0.9.32.md              |  2 +
 8 files changed, 154 insertions(+), 3 deletions(-)
 create mode 100644 app/MindWork AI Studio/Chat/ChatThreadExtensions.cs
diff --git a/app/MindWork AI Studio/Chat/ChatThread.cs b/app/MindWork AI Studio/Chat/ChatThread.cs
index 73182e7c..d6c870aa 100644
--- a/app/MindWork AI Studio/Chat/ChatThread.cs	
+++ b/app/MindWork AI Studio/Chat/ChatThread.cs	
@@ -45,6 +45,11 @@ public sealed record ChatThread
     /// </summary>
     public string AugmentedData { get; set; } = string.Empty;
 
+    /// <summary>
+    /// The data security to use, derived from the data sources used so far.
+    /// </summary>
+    public DataSourceSecurity DataSecurity { get; set; } = DataSourceSecurity.NOT_SPECIFIED;
+
     /// <summary>
     /// The name of the chat thread. Usually generated by an AI model or manually edited by the user.
     /// </summary>
diff --git a/app/MindWork AI Studio/Chat/ChatThreadExtensions.cs b/app/MindWork AI Studio/Chat/ChatThreadExtensions.cs
new file mode 100644
index 00000000..6b1b6500
--- /dev/null
+++ b/app/MindWork AI Studio/Chat/ChatThreadExtensions.cs	
@@ -0,0 +1,58 @@
+using AIStudio.Provider.SelfHosted;
+using AIStudio.Settings.DataModel;
+
+namespace AIStudio.Chat;
+
+public static class ChatThreadExtensions
+{
+    /// <summary>
+    /// Checks if the specified provider is allowed for the chat thread.
+    /// </summary>
+    /// <remarks>
+    /// We don't check if the provider is allowed to use the data sources of the chat thread.
+    /// That kind of check is done in the RAG process itself.<br/><br/>
+    /// 
+    /// One thing which is not so obvious: after RAG was used on this thread, the entire chat
+    /// thread is kind of a data source by itself. Why? Because the augmentation data collected
+    /// from the data sources is stored in the chat thread. This means we must check if the
+    /// selected provider is allowed to use this thread's data.
+    /// </remarks>
+    /// <param name="chatThread">The chat thread to check.</param>
+    /// <param name="provider">The provider to check.</param>
+    /// <returns>True, when the provider is allowed for the chat thread. False, otherwise.</returns>
+    public static bool IsLLMProviderAllowed<T>(this ChatThread? chatThread, T provider)
+    {
+        // No chat thread available means we have a new chat. That's fine:
+        if (chatThread is null)
+            return true;
+        
+        // The chat thread is available, but the data security is not specified.
+        // Means, we never used RAG or RAG was enabled, but no data sources were selected.
+        // That's fine as well:
+        if (chatThread.DataSecurity is DataSourceSecurity.NOT_SPECIFIED)
+            return true;
+
+        //
+        // Is the provider self-hosted?
+        //
+        var isSelfHostedProvider = provider switch
+        {
+            ProviderSelfHosted => true,
+            AIStudio.Settings.Provider p => p.IsSelfHosted,
+            
+            _ => false,
+        };
+        
+        //
+        // Check the chat data security against the selected provider:
+        //
+        return isSelfHostedProvider switch
+        {
+            // The provider is self-hosted -- we can use any data source:
+            true => true,
+            
+            // The provider is not self-hosted -- it depends on the data security of the chat thread:
+            false => chatThread.DataSecurity is not DataSourceSecurity.SELF_HOSTED,
+        };
+    }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Chat/ContentBlockComponent.razor b/app/MindWork AI Studio/Chat/ContentBlockComponent.razor
index 826cfdc0..f1c93b65 100644
--- a/app/MindWork AI Studio/Chat/ContentBlockComponent.razor	
+++ b/app/MindWork AI Studio/Chat/ContentBlockComponent.razor	
@@ -27,7 +27,7 @@
             @if (this.IsLastContentBlock && this.Role is ChatRole.AI && this.RegenerateFunc is not null)
             {
                 <MudTooltip Text="Regenerate" Placement="Placement.Bottom">
-                    <MudIconButton Icon="@Icons.Material.Filled.Recycling" Color="Color.Default" OnClick="@this.RegenerateBlock"/>
+                    <MudIconButton Icon="@Icons.Material.Filled.Recycling" Color="Color.Default" Disabled="@(!this.RegenerateEnabled())" OnClick="@this.RegenerateBlock"/>
                 </MudTooltip>
             }
             @if (this.RemoveBlockFunc is not null)
diff --git a/app/MindWork AI Studio/Chat/ContentText.cs b/app/MindWork AI Studio/Chat/ContentText.cs
index 0dccd262..38872edf 100644
--- a/app/MindWork AI Studio/Chat/ContentText.cs	
+++ b/app/MindWork AI Studio/Chat/ContentText.cs	
@@ -41,6 +41,13 @@ public sealed class ContentText : IContent
         if(chatThread is null)
             return new();
         
+        if(!chatThread.IsLLMProviderAllowed(provider))
+        {
+            var logger = Program.SERVICE_PROVIDER.GetService<ILogger<ContentText>>()!;
+            logger.LogError("The provider is not allowed for this chat thread due to data security reasons. Skipping the AI process.");
+            return chatThread;
+        }
+
         // Call the RAG process. Right now, we only have one RAG process:
         if (lastPrompt is not null)
         {
diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor b/app/MindWork AI Studio/Components/ChatComponent.razor
index 06dd07b2..703d9175 100644
--- a/app/MindWork AI Studio/Components/ChatComponent.razor	
+++ b/app/MindWork AI Studio/Components/ChatComponent.razor	
@@ -24,7 +24,7 @@
                         IsLastContentBlock="@isLastBlock"
                         IsSecondToLastBlock="@isSecondLastBlock"
                         RegenerateFunc="@this.RegenerateBlock"
-                        RegenerateEnabled="@(() => this.IsProviderSelected)"
+                        RegenerateEnabled="@(() => this.IsProviderSelected && this.ChatThread.IsLLMProviderAllowed(this.Provider))"
                         EditLastBlockFunc="@this.EditLastBlock"
                         EditLastUserBlockFunc="@this.EditLastUserBlock"/>
                 }
@@ -46,7 +46,7 @@
                 Adornment="Adornment.End"
                 AdornmentIcon="@Icons.Material.Filled.Send"
                 OnAdornmentClick="() => this.SendMessage()"
-                ReadOnly="!this.IsProviderSelected || this.isStreaming"
+                Disabled="@this.IsInputForbidden()"
                 Immediate="@true"
                 OnKeyUp="this.InputKeyEvent"
                 UserAttributes="@USER_INPUT_ATTRIBUTES"
@@ -113,6 +113,14 @@
             {
                 <DataSourceSelection @ref="@this.dataSourceSelectionComponent" PopoverTriggerMode="PopoverTriggerMode.BUTTON" PopoverButtonClasses="ma-3" LLMProvider="@this.Provider" DataSourceOptions="@this.GetCurrentDataSourceOptions()" DataSourceOptionsChanged="@(async options => await this.SetCurrentDataSourceOptions(options))" DataSourcesAISelected="@this.GetAgentSelectedDataSources()"/>
             }
+            
+            @if (!this.ChatThread.IsLLMProviderAllowed(this.Provider))
+            {
+                <MudTooltip Text="The selected provider is not allowed in this chat due to data security reasons." Placement="@TOOLBAR_TOOLTIP_PLACEMENT">
+                    <MudIconButton Icon="@Icons.Material.Filled.Error" Color="Color.Error"/>
+                </MudTooltip>
+            }
+            <MudIconButton />
         </MudToolBar>
     </FooterContent>
 </InnerScrolling>
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor.cs b/app/MindWork AI Studio/Components/ChatComponent.razor.cs
index d8dee123..f3b6b742 100644
--- a/app/MindWork AI Studio/Components/ChatComponent.razor.cs	
+++ b/app/MindWork AI Studio/Components/ChatComponent.razor.cs	
@@ -340,6 +340,20 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
             this.earlyDataSourceOptions = updatedOptions;
     }
 
+    private bool IsInputForbidden()
+    {
+        if (!this.IsProviderSelected)
+            return true;
+        
+        if(this.isStreaming)
+            return true;
+        
+        if(!this.ChatThread.IsLLMProviderAllowed(this.Provider))
+            return true;
+        
+        return false;
+    }
+
     private async Task InputKeyEvent(KeyboardEventArgs keyEvent)
     {
         if(this.dataSourceSelectionComponent?.IsVisible ?? false)
@@ -374,6 +388,9 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
         if (!this.IsProviderSelected)
             return;
         
+        if(!this.ChatThread.IsLLMProviderAllowed(this.Provider))
+            return;
+        
         // We need to blur the focus away from the input field
         // to be able to clear the field:
         await this.inputField.BlurAsync();
@@ -776,6 +793,9 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
         if(this.ChatThread is null)
             return;
         
+        if(!this.ChatThread.IsLLMProviderAllowed(this.Provider))
+            return;
+        
         this.ChatThread.Remove(aiBlock, removeForRegenerate: true);
         this.hasUnsavedChanges = true;
         this.StateHasChanged();
diff --git a/app/MindWork AI Studio/Tools/RAG/RAGProcesses/AISrcSelWithRetCtxVal.cs b/app/MindWork AI Studio/Tools/RAG/RAGProcesses/AISrcSelWithRetCtxVal.cs
index ae7c078b..2736f7b0 100644
--- a/app/MindWork AI Studio/Tools/RAG/RAGProcesses/AISrcSelWithRetCtxVal.cs	
+++ b/app/MindWork AI Studio/Tools/RAG/RAGProcesses/AISrcSelWithRetCtxVal.cs	
@@ -1,6 +1,7 @@
 using AIStudio.Chat;
 using AIStudio.Provider;
 using AIStudio.Settings;
+using AIStudio.Settings.DataModel;
 using AIStudio.Tools.RAG.AugmentationProcesses;
 using AIStudio.Tools.RAG.DataSourceSelectionProcesses;
 using AIStudio.Tools.Services;
@@ -96,6 +97,56 @@ public sealed class AISrcSelWithRetCtxVal : IRagProcess
                 logger.LogWarning("No data sources are selected. The RAG process is skipped.");
                 proceedWithRAG = false;
             }
+            else
+            {
+                var previousDataSecurity = chatThread.DataSecurity;
+                
+                //
+                // Update the data security of the chat thread. We consider the current data security
+                // of the chat thread and the data security of the selected data sources:
+                //
+                var dataSecurityRestrictedToSelfHosted = selectedDataSources.Any(x => x.SecurityPolicy is DataSourceSecurity.SELF_HOSTED);
+                chatThread.DataSecurity = dataSecurityRestrictedToSelfHosted switch
+                {
+                    //
+                    //
+                    // Case: the data sources which are selected have a security policy
+                    // of SELF_HOSTED (at least one data source).
+                    //
+                    // When the policy was already set to ALLOW_ANY, we restrict it
+                    // to SELF_HOSTED.
+                    //
+                    true => DataSourceSecurity.SELF_HOSTED,
+                    
+                    //
+                    // Case: the data sources which are selected have a security policy
+                    // of ALLOW_ANY (none of the data sources has a SELF_HOSTED policy).
+                    //
+                    // When the policy was already set to SELF_HOSTED, we must keep that.
+                    //
+                    false => chatThread.DataSecurity switch
+                    {
+                        //
+                        // When the policy was not specified yet, we set it to ALLOW_ANY.
+                        //
+                        DataSourceSecurity.NOT_SPECIFIED => DataSourceSecurity.ALLOW_ANY,
+                        DataSourceSecurity.ALLOW_ANY => DataSourceSecurity.ALLOW_ANY,
+                        
+                        //
+                        // When the policy was already set to SELF_HOSTED, we must keep that.
+                        // This is important since the thread might already contain data
+                        // from a data source with a SELF_HOSTED policy.
+                        //
+                        DataSourceSecurity.SELF_HOSTED => DataSourceSecurity.SELF_HOSTED,
+                        
+                        // Default case: we use the current data security of the chat thread.
+                        _ => chatThread.DataSecurity,
+                    }
+                };
+                
+                if (previousDataSecurity != chatThread.DataSecurity)
+                    logger.LogInformation($"The data security of the chat thread was updated from '{previousDataSecurity}' to '{chatThread.DataSecurity}'.");
+            }
             
             //
             // Trigger the retrieval part of the (R)AG process:
diff --git a/app/MindWork AI Studio/wwwroot/changelog/v0.9.32.md b/app/MindWork AI Studio/wwwroot/changelog/v0.9.32.md
index 5ff89780..fa8328e3 100644
--- a/app/MindWork AI Studio/wwwroot/changelog/v0.9.32.md	
+++ b/app/MindWork AI Studio/wwwroot/changelog/v0.9.32.md	
@@ -1,5 +1,6 @@
 ﻿# v0.9.32, build 207 (2025-03-xx xx:xx UTC)
 - Added the "Community & Code" section to the about page. It includes links to the GitHub repositories and the project website.
+- Improved data security by preventing the use of cloud LLMs after confidential data has been retrieved previously.
 - Improved the ERI client to expect JSON responses and send JSON requests using camel case.
 - Improved the ERI client to raise an error when the server responds with additional JSON data that is not expected.
 - Improved the error handling in the ERI data source info dialog in cases where servers respond with an invalid message.
@@ -9,4 +10,5 @@
 - Fixed the chat thread we use for the data retrieval by removing the last block, which is meant to be for the final AI answer.
 - Fixed the data source name for ERI data sources when performing data retrieval.
 - Fixed the default data source selection when replacing the current chat with a new one.
+- Fixed the state of the re-generate button in the chat thread, when no provider is selected or the data security is preventing the use of cloud LLMs.
 - Upgraded code dependencies.
\ No newline at end of file