Added agent architecture & first agent

2025-11-07 09:00:22 +00:00 · 2024-07-30 20:35:54 +02:00 · 2024-07-30 20:35:54 +02:00 · 9b4a2c1d49
commit 9b4a2c1d49
parent 6065728128
12 changed files with 297 additions and 1 deletions
--- a/Studio/Agents/AgentBase.cs
+++ b/Studio/Agents/AgentBase.cs
@ -0,0 +1,109 @@
+using AIStudio.Chat;
+using AIStudio.Provider;
+using AIStudio.Settings;
+using AIStudio.Tools;
+
+// ReSharper disable MemberCanBePrivate.Global
+
+namespace AIStudio.Agents;
+
+public abstract class AgentBase(SettingsManager settingsManager, IJSRuntime jsRuntime, ThreadSafeRandom rng) : IAgent
+{
+    protected SettingsManager SettingsManager { get; init; } = settingsManager;
+
+    protected IJSRuntime JsRuntime { get; init; } = jsRuntime;
+
+    protected ThreadSafeRandom RNG { get; init; } = rng;
+
+    /// <summary>
+    /// Represents the type or category of this agent.
+    /// </summary>
+    protected abstract Type Type { get; }
+    
+    /// <summary>
+    /// The name of the agent.
+    /// </summary>
+    public abstract string Id { get; }
+
+    /// <summary>
+    /// The agent's job description. Will be used for the system prompt.
+    /// </summary>
+    protected abstract string JobDescription { get; }
+
+    /// <summary>
+    /// Represents the system prompt provided for the agent.
+    /// </summary>
+    protected abstract string SystemPrompt(string additionalData);
+    
+    #region Implementation of IAgent
+    
+    public abstract AIStudio.Settings.Provider? ProviderSettings { get; set; }
+    
+    public abstract Task<ChatThread> ProcessContext(ChatThread chatThread, IDictionary<string, string> additionalData);
+    
+    public abstract Task<ContentBlock> ProcessInput(ContentBlock input, IDictionary<string, string> additionalData);
+    
+    public abstract Task<bool> MadeDecision(ContentBlock input);
+    
+    public abstract IReadOnlyCollection<ContentBlock> GetContext();
+    
+    public abstract IReadOnlyCollection<ContentBlock> GetAnswers();
+    
+    #endregion
+    
+    protected ChatThread CreateChatThread(string systemPrompt) => new()
+    {
+        WorkspaceId = Guid.Empty,
+        ChatId = Guid.NewGuid(),
+        Name = string.Empty,
+        Seed = this.RNG.Next(),
+        SystemPrompt = systemPrompt,
+        Blocks = [],
+    };
+
+    protected DateTimeOffset AddUserRequest(ChatThread thread, string request)
+    {
+        var time = DateTimeOffset.Now;
+        thread.Blocks.Add(new ContentBlock
+        {
+            Time = time,
+            ContentType = ContentType.TEXT,
+            Role = ChatRole.USER,
+            Content = new ContentText
+            {
+                Text = request,
+            },
+        });
+        
+        return time;
+    }
+    
+    protected async Task AddAIResponseAsync(ChatThread thread, DateTimeOffset time)
+    {
+        if(this.ProviderSettings is null)
+            return;
+        
+        var providerSettings = this.ProviderSettings.Value;
+        var aiText = new ContentText
+        {
+            // We have to wait for the remote
+            // for the content stream: 
+            InitialRemoteWait = true,
+        };
+
+        var resultingContentBlock = new ContentBlock
+        {
+            Time = time,
+            ContentType = ContentType.TEXT,
+            Role = ChatRole.AI,
+            Content = aiText,
+        };
+        
+        thread.Blocks.Add(resultingContentBlock);
+        
+        // Use the selected provider to get the AI response.
+        // By awaiting this line, we wait for the entire
+        // content to be streamed.
+        await aiText.CreateFromProviderAsync(providerSettings.CreateProvider(), this.JsRuntime, this.SettingsManager, providerSettings.Model, thread);
+    }
+}
--- a/Studio/Agents/AgentTextContentCleaner.cs
+++ b/Studio/Agents/AgentTextContentCleaner.cs
@ -0,0 +1,94 @@
+using AIStudio.Chat;
+using AIStudio.Settings;
+using AIStudio.Tools;
+
+namespace AIStudio.Agents;
+
+public sealed class AgentTextContentCleaner(SettingsManager settingsManager, IJSRuntime jsRuntime, ThreadSafeRandom rng) : AgentBase(settingsManager, jsRuntime, rng)
+{
+    private static readonly ContentBlock EMPTY_BLOCK = new()
+    {
+        Content = null,
+        ContentType = ContentType.NONE,
+        Role = ChatRole.AGENT,
+        Time = DateTimeOffset.UtcNow,
+    };
+    
+    private readonly List<ContentBlock> context = new();
+    private readonly List<ContentBlock> answers = new();
+    
+    #region Overrides of AgentBase
+
+    public override Settings.Provider? ProviderSettings { get; set; }
+
+    protected override Type Type => Type.SYSTEM;
+
+    public override string Id => "Text Content Cleaner";
+
+    protected override string JobDescription => 
+        """
+        You receive a Markdown document as input. Your goal is to identify the main content of the document
+        and return it including Markdown formatting. Remove areas that do not belong to the main part of the
+        document. For a blog article, return only the text of the article with its formatting. For a scientific
+        paper, only the contents of the paper. Delete elements of navigation, advertisements, HTML artifacts,
+        cookie banners, etc. If the content contains images, these images remain. The same applies to links.
+        Ensure that links and images are present as valid Markdown:
+        
+        - Syntax of links: [link text](URL)
+        - Syntax of images: ![alt text](URL)
+        
+        If you find relative links or images with relative paths, correct them to absolute paths. For this
+        purpose, here is the source URL:
+        """;
+
+    protected override string SystemPrompt(string additionalData) => $"{this.JobDescription} `{additionalData}`.";
+    
+    /// <inheritdoc />
+    public override async Task<ChatThread> ProcessContext(ChatThread chatThread, IDictionary<string, string> additionalData)
+    {
+        // We process the last block of the chat thread. Then, we add the result
+        // to the chat thread as the last block:
+        var answer = await this.ProcessInput(chatThread.Blocks[^1], additionalData);
+        chatThread.Blocks.Add(answer);
+        
+        this.context.Clear();
+        this.context.AddRange(chatThread.Blocks);
+        
+        return chatThread;
+    }
+
+    // <inheritdoc />
+    public override async Task<ContentBlock> ProcessInput(ContentBlock input, IDictionary<string, string> additionalData)
+    {
+        if (input.Content is not ContentText text)
+            return EMPTY_BLOCK;
+        
+        if(text.InitialRemoteWait || text.IsStreaming)
+            return EMPTY_BLOCK;
+        
+        if(string.IsNullOrWhiteSpace(text.Text))
+            return EMPTY_BLOCK;
+        
+        if(!additionalData.TryGetValue("sourceURL", out var sourceURL) || string.IsNullOrWhiteSpace(sourceURL))
+            return EMPTY_BLOCK;
+        
+        var thread = this.CreateChatThread(this.SystemPrompt(sourceURL));
+        var time = this.AddUserRequest(thread, text.Text);
+        await this.AddAIResponseAsync(thread, time);
+        
+        var answer = thread.Blocks[^1];
+        this.answers.Add(answer);
+        return answer;
+    }
+
+    // <inheritdoc />
+    public override Task<bool> MadeDecision(ContentBlock input) => Task.FromResult(true);
+
+    // <inheritdoc />
+    public override IReadOnlyCollection<ContentBlock> GetContext() => this.context;
+
+    // <inheritdoc />
+    public override IReadOnlyCollection<ContentBlock> GetAnswers() => this.answers;
+
+    #endregion
+}
--- a/Studio/Agents/IAgent.cs
+++ b/Studio/Agents/IAgent.cs
@ -0,0 +1,55 @@
+using AIStudio.Chat;
+
+namespace AIStudio.Agents;
+
+public interface IAgent
+{
+    /// <summary>
+    /// Gets the name of the agent.
+    /// </summary>
+    public string Id { get; }
+
+    /// <summary>
+    /// The provider to use for this agent.
+    /// </summary>
+    public AIStudio.Settings.Provider? ProviderSettings { get; set; }
+
+    /// <summary>
+    /// Processes a chat thread (i.e., context) and returns the updated thread.
+    /// </summary>
+    /// <param name="chatThread">The chat thread to process. The thread is the context for the agent.</param>
+    /// <param name="additionalData">Additional data to use for processing the chat thread.</param>
+    /// <returns>The updated chat thread. The last content block of the thread is the agent's response.</returns>
+    public Task<ChatThread> ProcessContext(ChatThread chatThread, IDictionary<string, string> additionalData);
+
+    /// <summary>
+    /// Processes the input content block and returns the agent's response.
+    /// </summary>
+    /// <param name="input">The content block to process. It represents the input.</param>
+    /// <param name="additionalData">Additional data to use for processing the input.</param>
+    /// <returns>The content block representing the agent's response.</returns>
+    public Task<ContentBlock> ProcessInput(ContentBlock input, IDictionary<string, string> additionalData);
+
+    /// <summary>
+    /// The agent makes a decision based on the input.
+    /// </summary>
+    /// <param name="input">The content block to process. Should be a question or a request.</param>
+    /// <returns>
+    /// True if a decision has been made based on the input, false otherwise.
+    /// </returns>
+    public Task<bool> MadeDecision(ContentBlock input);
+
+    /// <summary>
+    /// Retrieves the context of the agent.
+    /// </summary>
+    /// <returns>The collection of content blocks representing the agent's context. This includes the user's and the other agent's messages.</returns>
+    public IReadOnlyCollection<ContentBlock> GetContext();
+
+    /// <summary>
+    /// Retrieves the answers from the agent's context.
+    /// </summary>
+    /// <returns>
+    /// The collection of content blocks representing the answers provided by this agent.
+    /// </returns>
+    public IReadOnlyCollection<ContentBlock> GetAnswers();
+}
--- a/Studio/Agents/Type.cs
+++ b/Studio/Agents/Type.cs
@ -0,0 +1,27 @@
+namespace AIStudio.Agents;
+
+public enum Type
+{
+    /// <summary>
+    /// Represents an unspecified agent type.
+    /// </summary>
+    UNSPECIFIED = 0,
+    
+    /// <summary>
+    /// Represents a conversational agent who produces human-like responses and feedback (depending on the context and its job description).
+    /// For example, an expert agent for a specific domain. Answers might be detailed and informative.
+    /// </summary>
+    CONVERSATIONAL,
+
+    /// <summary>
+    /// Represents a worker agent type who performs tasks and provides information or services (depending on the context and its job description).
+    /// For example, a quality assurance agent who assesses the quality of a product or service. Answers might be short and concise.
+    /// </summary>
+    WORKER,
+
+    /// <summary>
+    /// Represents the system agent type who processes the input and provides a specific response (depending on the context and its job description).
+    /// For example, a HTML content agent who processes the arbitrary HTML content and provides a structured Markdown response. Answers might be structured and formatted.
+    /// </summary>
+    SYSTEM,
+}
--- a/Studio/Chat/ChatRole.cs
+++ b/Studio/Chat/ChatRole.cs
@ -11,6 +11,7 @@ public enum ChatRole
    SYSTEM,
    USER,
    AI,
+    AGENT,
 }

 /// <summary>
--- a/Studio/Program.cs
+++ b/Studio/Program.cs
@ -1,4 +1,5 @@
 using AIStudio;
+using AIStudio.Agents;
 using AIStudio.Components;
 using AIStudio.Settings;
 using AIStudio.Tools;
@ -30,6 +31,7 @@ builder.Services.AddSingleton<Rust>();
 builder.Services.AddMudMarkdownClipboardService<MarkdownClipboardService>();
 builder.Services.AddSingleton<SettingsManager>();
 builder.Services.AddSingleton<ThreadSafeRandom>();
+builder.Services.AddTransient<AgentTextContentCleaner>();
 builder.Services.AddHostedService<UpdateService>();
 builder.Services.AddHostedService<TemporaryChatService>();
 builder.Services.AddRazorComponents()
--- a/Studio/Provider/Anthropic/ProviderAnthropic.cs
+++ b/Studio/Provider/Anthropic/ProviderAnthropic.cs
@ -41,6 +41,7 @@ public sealed class ProviderAnthropic() : BaseProvider("https://api.anthropic.co
                {
                    ChatRole.USER => "user",
                    ChatRole.AI => "assistant",
+                    ChatRole.AGENT => "assistant",

                    _ => "user",
                },
--- a/Studio/Provider/Fireworks/ProviderFireworks.cs
+++ b/Studio/Provider/Fireworks/ProviderFireworks.cs
@ -52,6 +52,7 @@ public class ProviderFireworks() : BaseProvider("https://api.fireworks.ai/infere
                {
                    ChatRole.USER => "user",
                    ChatRole.AI => "assistant",
+                    ChatRole.AGENT => "assistant",
                    ChatRole.SYSTEM => "system",

                    _ => "user",
--- a/Studio/Provider/Mistral/ProviderMistral.cs
+++ b/Studio/Provider/Mistral/ProviderMistral.cs
@ -51,6 +51,7 @@ public sealed class ProviderMistral() : BaseProvider("https://api.mistral.ai/v1/
                {
                    ChatRole.USER => "user",
                    ChatRole.AI => "assistant",
+                    ChatRole.AGENT => "assistant",
                    ChatRole.SYSTEM => "system",

                    _ => "user",
--- a/Studio/Provider/OpenAI/ProviderOpenAI.cs
+++ b/Studio/Provider/OpenAI/ProviderOpenAI.cs
@ -55,6 +55,7 @@ public sealed class ProviderOpenAI() : BaseProvider("https://api.openai.com/v1/"
                {
                    ChatRole.USER => "user",
                    ChatRole.AI => "assistant",
+                    ChatRole.AGENT => "assistant",
                    ChatRole.SYSTEM => "system",

                    _ => "user",
--- a/Studio/Provider/SelfHosted/ProviderSelfHosted.cs
+++ b/Studio/Provider/SelfHosted/ProviderSelfHosted.cs
@ -44,6 +44,7 @@ public sealed class ProviderSelfHosted(Settings.Provider provider) : BaseProvide
                {
                    ChatRole.USER => "user",
                    ChatRole.AI => "assistant",
+                    ChatRole.AGENT => "assistant",
                    ChatRole.SYSTEM => "system",

                    _ => "user",
--- a/Studio/wwwroot/changelog/v0.8.6.md
+++ b/Studio/wwwroot/changelog/v0.8.6.md
@ -1,5 +1,8 @@
 # v0.8.6, build 168
 - Added possibility to configure a default provider for chats
+- Added architecture for future agent usage
+- Added a first agent to read, analyze and extract text from Markdown data
 - Improved the readability of the `settings.json` file by using indentation and enum names instead of numbers
 - Improved assistant overview; assistants will now wrap to the next line if there are too many to fit on the row
- Increased the default value for the live translation delay from 1,000 to 1,500 ms
+- Increased the default value for the live translation delay from 1,000 to 1,500 ms
+- Fixed random number generator usage to be thread-safe