Added agent architecture & first agent

2025-07-12 07:22:56 +00:00 · 2024-07-30 20:35:54 +02:00 · 2024-07-30 20:35:54 +02:00 · 9b4a2c1d49
commit 9b4a2c1d49
parent 6065728128
12 changed files with 297 additions and 1 deletions
--- a/Studio/Agents/AgentBase.cs
+++ b/Studio/Agents/AgentBase.cs
@ -0,0 +1,109 @@
 using AIStudio.Chat;
 using AIStudio.Provider;
 using AIStudio.Settings;
 using AIStudio.Tools;
 // ReSharper disable MemberCanBePrivate.Global
 namespace AIStudio.Agents;
 public abstract class AgentBase(SettingsManager settingsManager, IJSRuntime jsRuntime, ThreadSafeRandom rng) : IAgent
 {
    protected SettingsManager SettingsManager { get; init; } = settingsManager;
    protected IJSRuntime JsRuntime { get; init; } = jsRuntime;
    protected ThreadSafeRandom RNG { get; init; } = rng;
    /// <summary>
    /// Represents the type or category of this agent.
    /// </summary>
    protected abstract Type Type { get; }
    /// <summary>
    /// The name of the agent.
    /// </summary>
    public abstract string Id { get; }
    /// <summary>
    /// The agent's job description. Will be used for the system prompt.
    /// </summary>
    protected abstract string JobDescription { get; }
    /// <summary>
    /// Represents the system prompt provided for the agent.
    /// </summary>
    protected abstract string SystemPrompt(string additionalData);
    #region Implementation of IAgent
    public abstract AIStudio.Settings.Provider? ProviderSettings { get; set; }
    public abstract Task<ChatThread> ProcessContext(ChatThread chatThread, IDictionary<string, string> additionalData);
    public abstract Task<ContentBlock> ProcessInput(ContentBlock input, IDictionary<string, string> additionalData);
    public abstract Task<bool> MadeDecision(ContentBlock input);
    public abstract IReadOnlyCollection<ContentBlock> GetContext();
    public abstract IReadOnlyCollection<ContentBlock> GetAnswers();
    #endregion
    protected ChatThread CreateChatThread(string systemPrompt) => new()
    {
        WorkspaceId = Guid.Empty,
        ChatId = Guid.NewGuid(),
        Name = string.Empty,
        Seed = this.RNG.Next(),
        SystemPrompt = systemPrompt,
        Blocks = [],
    };
    protected DateTimeOffset AddUserRequest(ChatThread thread, string request)
    {
        var time = DateTimeOffset.Now;
        thread.Blocks.Add(new ContentBlock
        {
            Time = time,
            ContentType = ContentType.TEXT,
            Role = ChatRole.USER,
            Content = new ContentText
            {
                Text = request,
            },
        });
        return time;
    }
    protected async Task AddAIResponseAsync(ChatThread thread, DateTimeOffset time)
    {
        if(this.ProviderSettings is null)
            return;
        var providerSettings = this.ProviderSettings.Value;
        var aiText = new ContentText
        {
            // We have to wait for the remote
            // for the content stream: 
            InitialRemoteWait = true,
        };
        var resultingContentBlock = new ContentBlock
        {
            Time = time,
            ContentType = ContentType.TEXT,
            Role = ChatRole.AI,
            Content = aiText,
        };
        thread.Blocks.Add(resultingContentBlock);
        // Use the selected provider to get the AI response.
        // By awaiting this line, we wait for the entire
        // content to be streamed.
        await aiText.CreateFromProviderAsync(providerSettings.CreateProvider(), this.JsRuntime, this.SettingsManager, providerSettings.Model, thread);
    }
 }
--- a/Studio/Agents/AgentTextContentCleaner.cs
+++ b/Studio/Agents/AgentTextContentCleaner.cs
@ -0,0 +1,94 @@
 using AIStudio.Chat;
 using AIStudio.Settings;
 using AIStudio.Tools;
 namespace AIStudio.Agents;
 public sealed class AgentTextContentCleaner(SettingsManager settingsManager, IJSRuntime jsRuntime, ThreadSafeRandom rng) : AgentBase(settingsManager, jsRuntime, rng)
 {
    private static readonly ContentBlock EMPTY_BLOCK = new()
    {
        Content = null,
        ContentType = ContentType.NONE,
        Role = ChatRole.AGENT,
        Time = DateTimeOffset.UtcNow,
    };
    private readonly List<ContentBlock> context = new();
    private readonly List<ContentBlock> answers = new();
    #region Overrides of AgentBase
    public override Settings.Provider? ProviderSettings { get; set; }
    protected override Type Type => Type.SYSTEM;
    public override string Id => "Text Content Cleaner";
    protected override string JobDescription => 
        """
        You receive a Markdown document as input. Your goal is to identify the main content of the document
        and return it including Markdown formatting. Remove areas that do not belong to the main part of the
        document. For a blog article, return only the text of the article with its formatting. For a scientific
        paper, only the contents of the paper. Delete elements of navigation, advertisements, HTML artifacts,
        cookie banners, etc. If the content contains images, these images remain. The same applies to links.
        Ensure that links and images are present as valid Markdown:
        - Syntax of links: [link text](URL)
        - Syntax of images: ![alt text](URL)
        If you find relative links or images with relative paths, correct them to absolute paths. For this
        purpose, here is the source URL:
        """;
    protected override string SystemPrompt(string additionalData) => $"{this.JobDescription} `{additionalData}`.";
    /// <inheritdoc />
    public override async Task<ChatThread> ProcessContext(ChatThread chatThread, IDictionary<string, string> additionalData)
    {
        // We process the last block of the chat thread. Then, we add the result
        // to the chat thread as the last block:
        var answer = await this.ProcessInput(chatThread.Blocks[^1], additionalData);
        chatThread.Blocks.Add(answer);
        this.context.Clear();
        this.context.AddRange(chatThread.Blocks);
        return chatThread;
    }
    // <inheritdoc />
    public override async Task<ContentBlock> ProcessInput(ContentBlock input, IDictionary<string, string> additionalData)
    {
        if (input.Content is not ContentText text)
            return EMPTY_BLOCK;
        if(text.InitialRemoteWait || text.IsStreaming)
            return EMPTY_BLOCK;
        if(string.IsNullOrWhiteSpace(text.Text))
            return EMPTY_BLOCK;
        if(!additionalData.TryGetValue("sourceURL", out var sourceURL) || string.IsNullOrWhiteSpace(sourceURL))
            return EMPTY_BLOCK;
        var thread = this.CreateChatThread(this.SystemPrompt(sourceURL));
        var time = this.AddUserRequest(thread, text.Text);
        await this.AddAIResponseAsync(thread, time);
        var answer = thread.Blocks[^1];
        this.answers.Add(answer);
        return answer;
    }
    // <inheritdoc />
    public override Task<bool> MadeDecision(ContentBlock input) => Task.FromResult(true);
    // <inheritdoc />
    public override IReadOnlyCollection<ContentBlock> GetContext() => this.context;
    // <inheritdoc />
    public override IReadOnlyCollection<ContentBlock> GetAnswers() => this.answers;
    #endregion
 }
--- a/Studio/Agents/IAgent.cs
+++ b/Studio/Agents/IAgent.cs
@ -0,0 +1,55 @@
 using AIStudio.Chat;
 namespace AIStudio.Agents;
 public interface IAgent
 {
    /// <summary>
    /// Gets the name of the agent.
    /// </summary>
    public string Id { get; }
    /// <summary>
    /// The provider to use for this agent.
    /// </summary>
    public AIStudio.Settings.Provider? ProviderSettings { get; set; }
    /// <summary>
    /// Processes a chat thread (i.e., context) and returns the updated thread.
    /// </summary>
    /// <param name="chatThread">The chat thread to process. The thread is the context for the agent.</param>
    /// <param name="additionalData">Additional data to use for processing the chat thread.</param>
    /// <returns>The updated chat thread. The last content block of the thread is the agent's response.</returns>
    public Task<ChatThread> ProcessContext(ChatThread chatThread, IDictionary<string, string> additionalData);
    /// <summary>
    /// Processes the input content block and returns the agent's response.
    /// </summary>
    /// <param name="input">The content block to process. It represents the input.</param>
    /// <param name="additionalData">Additional data to use for processing the input.</param>
    /// <returns>The content block representing the agent's response.</returns>
    public Task<ContentBlock> ProcessInput(ContentBlock input, IDictionary<string, string> additionalData);
    /// <summary>
    /// The agent makes a decision based on the input.
    /// </summary>
    /// <param name="input">The content block to process. Should be a question or a request.</param>
    /// <returns>
    /// True if a decision has been made based on the input, false otherwise.
    /// </returns>
    public Task<bool> MadeDecision(ContentBlock input);
    /// <summary>
    /// Retrieves the context of the agent.
    /// </summary>
    /// <returns>The collection of content blocks representing the agent's context. This includes the user's and the other agent's messages.</returns>
    public IReadOnlyCollection<ContentBlock> GetContext();
    /// <summary>
    /// Retrieves the answers from the agent's context.
    /// </summary>
    /// <returns>
    /// The collection of content blocks representing the answers provided by this agent.
    /// </returns>
    public IReadOnlyCollection<ContentBlock> GetAnswers();
 }
--- a/Studio/Agents/Type.cs
+++ b/Studio/Agents/Type.cs
@ -0,0 +1,27 @@
 namespace AIStudio.Agents;
 public enum Type
 {
    /// <summary>
    /// Represents an unspecified agent type.
    /// </summary>
    UNSPECIFIED = 0,
    /// <summary>
    /// Represents a conversational agent who produces human-like responses and feedback (depending on the context and its job description).
    /// For example, an expert agent for a specific domain. Answers might be detailed and informative.
    /// </summary>
    CONVERSATIONAL,
    /// <summary>
    /// Represents a worker agent type who performs tasks and provides information or services (depending on the context and its job description).
    /// For example, a quality assurance agent who assesses the quality of a product or service. Answers might be short and concise.
    /// </summary>
    WORKER,
    /// <summary>
    /// Represents the system agent type who processes the input and provides a specific response (depending on the context and its job description).
    /// For example, a HTML content agent who processes the arbitrary HTML content and provides a structured Markdown response. Answers might be structured and formatted.
    /// </summary>
    SYSTEM,
 }
--- a/Studio/Chat/ChatRole.cs
+++ b/Studio/Chat/ChatRole.cs
@ -11,6 +11,7 @@ public enum ChatRole
    SYSTEM,
    USER,
    AI,
    AGENT,
 }
 /// <summary>
--- a/Studio/Program.cs
+++ b/Studio/Program.cs
@ -1,4 +1,5 @@
 using AIStudio;
 using AIStudio.Agents;
 using AIStudio.Components;
 using AIStudio.Settings;
 using AIStudio.Tools;
@ -30,6 +31,7 @@ builder.Services.AddSingleton<Rust>();
 builder.Services.AddMudMarkdownClipboardService<MarkdownClipboardService>();
 builder.Services.AddSingleton<SettingsManager>();
 builder.Services.AddSingleton<ThreadSafeRandom>();
 builder.Services.AddTransient<AgentTextContentCleaner>();
 builder.Services.AddHostedService<UpdateService>();
 builder.Services.AddHostedService<TemporaryChatService>();
 builder.Services.AddRazorComponents()
--- a/Studio/Provider/Anthropic/ProviderAnthropic.cs
+++ b/Studio/Provider/Anthropic/ProviderAnthropic.cs
@ -41,6 +41,7 @@ public sealed class ProviderAnthropic() : BaseProvider("https://api.anthropic.co
                {
                    ChatRole.USER => "user",
                    ChatRole.AI => "assistant",
                    ChatRole.AGENT => "assistant",
                    _ => "user",
                },
--- a/Studio/Provider/Fireworks/ProviderFireworks.cs
+++ b/Studio/Provider/Fireworks/ProviderFireworks.cs
@ -52,6 +52,7 @@ public class ProviderFireworks() : BaseProvider("https://api.fireworks.ai/infere
                {
                    ChatRole.USER => "user",
                    ChatRole.AI => "assistant",
                    ChatRole.AGENT => "assistant",
                    ChatRole.SYSTEM => "system",
                    _ => "user",
--- a/Studio/Provider/Mistral/ProviderMistral.cs
+++ b/Studio/Provider/Mistral/ProviderMistral.cs
@ -51,6 +51,7 @@ public sealed class ProviderMistral() : BaseProvider("https://api.mistral.ai/v1/
                {
                    ChatRole.USER => "user",
                    ChatRole.AI => "assistant",
                    ChatRole.AGENT => "assistant",
                    ChatRole.SYSTEM => "system",
                    _ => "user",
--- a/Studio/Provider/OpenAI/ProviderOpenAI.cs
+++ b/Studio/Provider/OpenAI/ProviderOpenAI.cs
@ -55,6 +55,7 @@ public sealed class ProviderOpenAI() : BaseProvider("https://api.openai.com/v1/"
                {
                    ChatRole.USER => "user",
                    ChatRole.AI => "assistant",
                    ChatRole.AGENT => "assistant",
                    ChatRole.SYSTEM => "system",
                    _ => "user",
--- a/Studio/Provider/SelfHosted/ProviderSelfHosted.cs
+++ b/Studio/Provider/SelfHosted/ProviderSelfHosted.cs
@ -44,6 +44,7 @@ public sealed class ProviderSelfHosted(Settings.Provider provider) : BaseProvide
                {
                    ChatRole.USER => "user",
                    ChatRole.AI => "assistant",
                    ChatRole.AGENT => "assistant",
                    ChatRole.SYSTEM => "system",
                    _ => "user",
--- a/Studio/wwwroot/changelog/v0.8.6.md
+++ b/Studio/wwwroot/changelog/v0.8.6.md
@ -1,5 +1,8 @@
 # v0.8.6, build 168
 - Added possibility to configure a default provider for chats
 - Added architecture for future agent usage
 - Added a first agent to read, analyze and extract text from Markdown data
 - Improved the readability of the `settings.json` file by using indentation and enum names instead of numbers
 - Improved assistant overview; assistants will now wrap to the next line if there are too many to fit on the row
 - Increased the default value for the live translation delay from 1,000 to 1,500 ms
 - Fixed random number generator usage to be thread-safe