From 954cf449397c6afda1351e00d47131e4fef9a7d5 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Mon, 10 Feb 2025 18:49:22 +0100 Subject: [PATCH] Added retrieval integration, part I (#281) --- README.md | 2 +- app/MindWork AI Studio/Agents/AgentBase.cs | 14 +- .../Assistants/AssistantBase.razor.cs | 13 +- .../Chat/ContentBlockComponent.razor | 4 +- app/MindWork AI Studio/Chat/ContentImage.cs | 13 +- .../Chat/ContentImageSource.cs | 8 + app/MindWork AI Studio/Chat/ContentText.cs | 14 +- app/MindWork AI Studio/Chat/IContent.cs | 2 +- .../Components/ChatComponent.razor.cs | 15 +- app/MindWork AI Studio/Pages/Writer.razor.cs | 14 +- .../Tools/RAG/IRetrievalContext.cs | 45 ++++++ .../Tools/RAG/RetrievalContentCategory.cs | 12 ++ .../RAG/RetrievalContentCategoryExtensions.cs | 24 +++ .../Tools/RAG/RetrievalContentType.cs | 105 +++++++++++++ .../RAG/RetrievalContentTypeExtensions.cs | 141 ++++++++++++++++++ .../Tools/RAG/RetrievalImageContext.cs | 38 +++++ .../Tools/RAG/RetrievalTextContext.cs | 43 ++++++ .../wwwroot/changelog/v0.9.29.md | 1 + 18 files changed, 477 insertions(+), 31 deletions(-) create mode 100644 app/MindWork AI Studio/Chat/ContentImageSource.cs create mode 100644 app/MindWork AI Studio/Tools/RAG/IRetrievalContext.cs create mode 100644 app/MindWork AI Studio/Tools/RAG/RetrievalContentCategory.cs create mode 100644 app/MindWork AI Studio/Tools/RAG/RetrievalContentCategoryExtensions.cs create mode 100644 app/MindWork AI Studio/Tools/RAG/RetrievalContentType.cs create mode 100644 app/MindWork AI Studio/Tools/RAG/RetrievalContentTypeExtensions.cs create mode 100644 app/MindWork AI Studio/Tools/RAG/RetrievalImageContext.cs create mode 100644 app/MindWork AI Studio/Tools/RAG/RetrievalTextContext.cs create mode 100644 app/MindWork AI Studio/wwwroot/changelog/v0.9.29.md diff --git a/README.md b/README.md index 0ad1f28..ebd2b5e 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Things we are currently working on: - [ ] App: Implement the process to vectorize one local file using embeddings - [ ] Runtime: Integration of the vector database [LanceDB](https://github.com/lancedb/lancedb) - [ ] App: Implement the continuous process of vectorizing data - - [ ] App: Define a common retrieval context interface for the integration of RAG processes in chats + - [x] ~~App: Define a common retrieval context interface for the integration of RAG processes in chats (PR [#281](https://github.com/MindWorkAI/AI-Studio/pull/281))~~ - [ ] App: Define a common augmentation interface for the integration of RAG processes in chats - [ ] App: Integrate data sources in chats diff --git a/app/MindWork AI Studio/Agents/AgentBase.cs b/app/MindWork AI Studio/Agents/AgentBase.cs index 8028a86..639fa99 100644 --- a/app/MindWork AI Studio/Agents/AgentBase.cs +++ b/app/MindWork AI Studio/Agents/AgentBase.cs @@ -13,6 +13,8 @@ public abstract class AgentBase(ILogger logger, SettingsManager setti protected ThreadSafeRandom RNG { get; init; } = rng; protected ILogger Logger { get; init; } = logger; + + protected IContent? lastUserPrompt; /// /// Represents the type or category of this agent. @@ -63,15 +65,17 @@ public abstract class AgentBase(ILogger logger, SettingsManager setti protected DateTimeOffset AddUserRequest(ChatThread thread, string request) { var time = DateTimeOffset.Now; + this.lastUserPrompt = new ContentText + { + Text = request, + }; + thread.Blocks.Add(new ContentBlock { Time = time, ContentType = ContentType.TEXT, Role = ChatRole.USER, - Content = new ContentText - { - Text = request, - }, + Content = this.lastUserPrompt, }); return time; @@ -103,6 +107,6 @@ public abstract class AgentBase(ILogger logger, SettingsManager setti // Use the selected provider to get the AI response. // By awaiting this line, we wait for the entire // content to be streamed. - await aiText.CreateFromProviderAsync(providerSettings.CreateProvider(this.Logger), this.SettingsManager, providerSettings.Model, thread); + await aiText.CreateFromProviderAsync(providerSettings.CreateProvider(this.Logger), this.SettingsManager, providerSettings.Model, this.lastUserPrompt, thread); } } \ No newline at end of file diff --git a/app/MindWork AI Studio/Assistants/AssistantBase.razor.cs b/app/MindWork AI Studio/Assistants/AssistantBase.razor.cs index 8e5b60d..63867ff 100644 --- a/app/MindWork AI Studio/Assistants/AssistantBase.razor.cs +++ b/app/MindWork AI Studio/Assistants/AssistantBase.razor.cs @@ -97,6 +97,7 @@ public abstract partial class AssistantBase : ComponentBase, IMessageBusReceiver protected bool inputIsValid; protected Profile currentProfile = Profile.NO_PROFILE; protected ChatThread? chatThread; + protected IContent? lastUserPrompt; private readonly Timer formChangeTimer = new(TimeSpan.FromSeconds(1.6)); @@ -242,16 +243,18 @@ public abstract partial class AssistantBase : ComponentBase, IMessageBusReceiver protected DateTimeOffset AddUserRequest(string request, bool hideContentFromUser = false) { var time = DateTimeOffset.Now; + this.lastUserPrompt = new ContentText + { + Text = request, + }; + this.chatThread!.Blocks.Add(new ContentBlock { Time = time, ContentType = ContentType.TEXT, HideFromUser = hideContentFromUser, Role = ChatRole.USER, - Content = new ContentText - { - Text = request, - }, + Content = this.lastUserPrompt, }); return time; @@ -287,7 +290,7 @@ public abstract partial class AssistantBase : ComponentBase, IMessageBusReceiver // Use the selected provider to get the AI response. // By awaiting this line, we wait for the entire // content to be streamed. - await aiText.CreateFromProviderAsync(this.providerSettings.CreateProvider(this.Logger), this.SettingsManager, this.providerSettings.Model, this.chatThread); + await aiText.CreateFromProviderAsync(this.providerSettings.CreateProvider(this.Logger), this.SettingsManager, this.providerSettings.Model, this.lastUserPrompt, this.chatThread); this.isProcessing = false; this.StateHasChanged(); diff --git a/app/MindWork AI Studio/Chat/ContentBlockComponent.razor b/app/MindWork AI Studio/Chat/ContentBlockComponent.razor index ca61acc..826cfdc 100644 --- a/app/MindWork AI Studio/Chat/ContentBlockComponent.razor +++ b/app/MindWork AI Studio/Chat/ContentBlockComponent.razor @@ -78,9 +78,9 @@ break; case ContentType.IMAGE: - if (this.Content is ContentImage imageContent) + if (this.Content is ContentImage { SourceType: ContentImageSource.URL or ContentImageSource.LOCAL_PATH } imageContent) { - + } break; diff --git a/app/MindWork AI Studio/Chat/ContentImage.cs b/app/MindWork AI Studio/Chat/ContentImage.cs index 314ba93..3a5fbd1 100644 --- a/app/MindWork AI Studio/Chat/ContentImage.cs +++ b/app/MindWork AI Studio/Chat/ContentImage.cs @@ -29,7 +29,7 @@ public sealed class ContentImage : IContent public Func StreamingEvent { get; set; } = () => Task.CompletedTask; /// - public Task CreateFromProviderAsync(IProvider provider, SettingsManager settings, Model chatModel, ChatThread chatChatThread, CancellationToken token = default) + public Task CreateFromProviderAsync(IProvider provider, SettingsManager settings, Model chatModel, IContent? lastPrompt, ChatThread? chatChatThread, CancellationToken token = default) { throw new NotImplementedException(); } @@ -37,12 +37,15 @@ public sealed class ContentImage : IContent #endregion /// - /// The URL of the image. + /// The type of the image source. /// - public string URL { get; set; } = string.Empty; + /// + /// Is the image source a URL, a local file path, a base64 string, etc.? + /// + public required ContentImageSource SourceType { get; init; } /// - /// The local path of the image. + /// The image source. /// - public string LocalPath { get; set; } = string.Empty; + public required string Source { get; set; } } \ No newline at end of file diff --git a/app/MindWork AI Studio/Chat/ContentImageSource.cs b/app/MindWork AI Studio/Chat/ContentImageSource.cs new file mode 100644 index 0000000..ec00966 --- /dev/null +++ b/app/MindWork AI Studio/Chat/ContentImageSource.cs @@ -0,0 +1,8 @@ +namespace AIStudio.Chat; + +public enum ContentImageSource +{ + URL, + LOCAL_PATH, + BASE64, +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Chat/ContentText.cs b/app/MindWork AI Studio/Chat/ContentText.cs index f7cc62f..2061391 100644 --- a/app/MindWork AI Studio/Chat/ContentText.cs +++ b/app/MindWork AI Studio/Chat/ContentText.cs @@ -35,11 +35,23 @@ public sealed class ContentText : IContent public Func StreamingEvent { get; set; } = () => Task.CompletedTask; /// - public async Task CreateFromProviderAsync(IProvider provider, SettingsManager settings, Model chatModel, ChatThread? chatThread, CancellationToken token = default) + public async Task CreateFromProviderAsync(IProvider provider, SettingsManager settings, Model chatModel, IContent? lastPrompt, ChatThread? chatThread, CancellationToken token = default) { if(chatThread is null) return; + // + // Check if the user wants to bind any data sources to the chat: + // + + // + // Trigger the retrieval part of the (R)AG process: + // + + // + // Perform the augmentation of the R(A)G process: + // + // Store the last time we got a response. We use this later // to determine whether we should notify the UI about the // new content or not. Depends on the energy saving mode diff --git a/app/MindWork AI Studio/Chat/IContent.cs b/app/MindWork AI Studio/Chat/IContent.cs index 1feea52..987bada 100644 --- a/app/MindWork AI Studio/Chat/IContent.cs +++ b/app/MindWork AI Studio/Chat/IContent.cs @@ -42,5 +42,5 @@ public interface IContent /// /// Uses the provider to create the content. /// - public Task CreateFromProviderAsync(IProvider provider, SettingsManager settings, Model chatModel, ChatThread chatChatThread, CancellationToken token = default); + public Task CreateFromProviderAsync(IProvider provider, SettingsManager settings, Model chatModel, IContent? lastPrompt, ChatThread? chatChatThread, CancellationToken token = default); } \ No newline at end of file diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor.cs b/app/MindWork AI Studio/Components/ChatComponent.razor.cs index 5a10625..466d210 100644 --- a/app/MindWork AI Studio/Components/ChatComponent.razor.cs +++ b/app/MindWork AI Studio/Components/ChatComponent.razor.cs @@ -295,8 +295,14 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable } var time = DateTimeOffset.Now; + IContent? lastUserPrompt; if (!reuseLastUserPrompt) { + lastUserPrompt = new ContentText + { + Text = this.userInput, + }; + // // Add the user message to the thread: // @@ -305,10 +311,7 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable Time = time, ContentType = ContentType.TEXT, Role = ChatRole.USER, - Content = new ContentText - { - Text = this.userInput, - }, + Content = lastUserPrompt, }); // Save the chat: @@ -319,6 +322,8 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable this.StateHasChanged(); } } + else + lastUserPrompt = this.ChatThread.Blocks.Last(x => x.Role is ChatRole.USER).Content; // // Add the AI response to the thread: @@ -360,7 +365,7 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable // Use the selected provider to get the AI response. // By awaiting this line, we wait for the entire // content to be streamed. - await aiText.CreateFromProviderAsync(this.Provider.CreateProvider(this.Logger), this.SettingsManager, this.Provider.Model, this.ChatThread, this.cancellationTokenSource.Token); + await aiText.CreateFromProviderAsync(this.Provider.CreateProvider(this.Logger), this.SettingsManager, this.Provider.Model, lastUserPrompt, this.ChatThread, this.cancellationTokenSource.Token); } this.cancellationTokenSource = null; diff --git a/app/MindWork AI Studio/Pages/Writer.razor.cs b/app/MindWork AI Studio/Pages/Writer.razor.cs index 5816074..60280d1 100644 --- a/app/MindWork AI Studio/Pages/Writer.razor.cs +++ b/app/MindWork AI Studio/Pages/Writer.razor.cs @@ -106,17 +106,19 @@ public partial class Writer : MSGComponentBase, IAsyncDisposable }; var time = DateTimeOffset.Now; + var lastUserPrompt = new ContentText + { + // We use the maximum 160 characters from the end of the text: + Text = this.userInput.Length > 160 ? this.userInput[^160..] : this.userInput, + }; + this.chatThread.Blocks.Clear(); this.chatThread.Blocks.Add(new ContentBlock { Time = time, ContentType = ContentType.TEXT, Role = ChatRole.USER, - Content = new ContentText - { - // We use the maximum 160 characters from the end of the text: - Text = this.userInput.Length > 160 ? this.userInput[^160..] : this.userInput, - }, + Content = lastUserPrompt, }); var aiText = new ContentText @@ -137,7 +139,7 @@ public partial class Writer : MSGComponentBase, IAsyncDisposable this.isStreaming = true; this.StateHasChanged(); - await aiText.CreateFromProviderAsync(this.providerSettings.CreateProvider(this.Logger), this.SettingsManager, this.providerSettings.Model, this.chatThread); + await aiText.CreateFromProviderAsync(this.providerSettings.CreateProvider(this.Logger), this.SettingsManager, this.providerSettings.Model, lastUserPrompt, this.chatThread); this.suggestion = aiText.Text; this.isStreaming = false; diff --git a/app/MindWork AI Studio/Tools/RAG/IRetrievalContext.cs b/app/MindWork AI Studio/Tools/RAG/IRetrievalContext.cs new file mode 100644 index 0000000..27a03df --- /dev/null +++ b/app/MindWork AI Studio/Tools/RAG/IRetrievalContext.cs @@ -0,0 +1,45 @@ +namespace AIStudio.Tools.RAG; + +/// +/// The common interface for any retrieval context. +/// +public interface IRetrievalContext +{ + /// + /// The name of the data source. + /// + /// + /// Depending on the configuration, the AI is selecting the appropriate data source. + /// In order to inform the user about where the information is coming from, the data + /// source name is necessary. + /// + public string DataSourceName { get; init; } + + /// + /// The category of the content, like e.g., text, audio, image, etc. + /// + public RetrievalContentCategory Category { get; init; } + + /// + /// What type of content is being retrieved? Like e.g., a project proposal, spreadsheet, art, etc. + /// + public RetrievalContentType Type { get; init; } + + /// + /// The path to the content, e.g., a URL, a file path, a path in a graph database, etc. + /// + public string Path { get; init; } + + /// + /// Links to related content, e.g., links to Wikipedia articles, links to sources, etc. + /// + /// + /// Why would you need links for retrieval? You are right that not all retrieval + /// contexts need links. But think about a web search feature, where we want to + /// query a search engine and get back a list of links to the most relevant + /// matches. Think about a continuous web crawler that is constantly looking for + /// new information and adding it to the knowledge base. In these cases, links + /// are essential. + /// + public IReadOnlyList Links { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/RAG/RetrievalContentCategory.cs b/app/MindWork AI Studio/Tools/RAG/RetrievalContentCategory.cs new file mode 100644 index 0000000..b3ee1be --- /dev/null +++ b/app/MindWork AI Studio/Tools/RAG/RetrievalContentCategory.cs @@ -0,0 +1,12 @@ +namespace AIStudio.Tools.RAG; + +public enum RetrievalContentCategory +{ + NONE, + UNKNOWN, + + TEXT, + IMAGE, + VIDEO, + AUDIO, +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/RAG/RetrievalContentCategoryExtensions.cs b/app/MindWork AI Studio/Tools/RAG/RetrievalContentCategoryExtensions.cs new file mode 100644 index 0000000..e4dd2a7 --- /dev/null +++ b/app/MindWork AI Studio/Tools/RAG/RetrievalContentCategoryExtensions.cs @@ -0,0 +1,24 @@ +using AIStudio.Tools.ERIClient.DataModel; + +namespace AIStudio.Tools.RAG; + +public static class RetrievalContentCategoryExtensions +{ + /// + /// Converts an ERI content type to a common retrieval content category. + /// + /// The content type yielded by the ERI server. + /// The corresponding retrieval content category. + public static RetrievalContentCategory ToRetrievalContentCategory(ContentType contentType) => contentType switch + { + ContentType.NONE => RetrievalContentCategory.NONE, + ContentType.UNKNOWN => RetrievalContentCategory.UNKNOWN, + ContentType.TEXT => RetrievalContentCategory.TEXT, + ContentType.IMAGE => RetrievalContentCategory.IMAGE, + ContentType.VIDEO => RetrievalContentCategory.VIDEO, + ContentType.AUDIO => RetrievalContentCategory.AUDIO, + ContentType.SPEECH => RetrievalContentCategory.AUDIO, + + _ => RetrievalContentCategory.UNKNOWN, + }; +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/RAG/RetrievalContentType.cs b/app/MindWork AI Studio/Tools/RAG/RetrievalContentType.cs new file mode 100644 index 0000000..c886611 --- /dev/null +++ b/app/MindWork AI Studio/Tools/RAG/RetrievalContentType.cs @@ -0,0 +1,105 @@ +namespace AIStudio.Tools.RAG; + +/// +/// The type of the retrieved content. +/// +public enum RetrievalContentType +{ + NOT_SPECIFIED, + UNKNOWN, + + // + // Text Content: + // + TEXT_DOCUMENT, + TEXT_ARTICLE, + TEXT_BOOK, + TEXT_CHAPTER, + TEXT_PAPER, + TEXT_THESIS, + TEXT_BUSINESS_CONCEPT, + TEXT_DICTIONARY, + TEXT_ENCYCLOPEDIA, + TEXT_GLOSSARY, + TEXT_JOURNAL, + TEXT_MAGAZINE, + TEXT_NEWSPAPER, + TEXT_REPORT, + TEXT_REVIEW, + TEXT_WEBSITE, + TEXT_IDEA, + TEXT_CONCEPT, + TEXT_DEFINITION, + TEXT_EXAMPLE, + TEXT_QUOTE, + TEXT_DRAFT, + TEXT_SCRIPT, + TEXT_TRANSCRIPT, + TEXT_SUBTITLE, + TEXT_CAPTION, + TEXT_DIALOGUE, + TEXT_PROJECT_PROPOSAL, + TEXT_PROJECT_PLAN, + TEXT_SPREADSHEET, + TEXT_PRESENTATION, + TEXT_MEETING_MINUTES, + TEXT_EMAIL, + TEXT_PROTOCOL, + + // + // Image Content: + // + IMAGE_PHOTO, + IMAGE_ILLUSTRATION, + IMAGE_DIAGRAM, + IMAGE_CHART, + IMAGE_ART, + IMAGE_DRAWING, + IMAGE_PAINTING, + IMAGE_SKETCH, + IMAGE_MAP, + IMAGE_CHARACTER, + IMAGE_SCENE, + IMAGE_LANDSCAPE, + IMAGE_PORTRAIT, + IMAGE_POSTER, + IMAGE_LOGO, + IMAGE_ICON, + IMAGE_SATELLITE_IMAGE, + + // + // Audio Content: + // + AUDIO_SPEECH, + AUDIO_PODCAST, + AUDIO_BOOK, + AUDIO_INTERVIEW, + AUDIO_LECTURE, + AUDIO_TALK, + AUDIO_SONG, + AUDIO_MUSIC, + AUDIO_SOUND, + AUDIO_CALL, + AUDIO_VOICE_ACTING, + AUDIO_DESCRIPTION, + AUDIO_GUIDE, + AUDIO_DIALOGUE, + + // + // Video Content: + // + VIDEO_MOVIE, + VIDEO_FILM, + VIDEO_TV_SHOW, + VIDEO_SERIES, + VIDEO_EPISODE, + VIDEO_DOCUMENTARY, + VIDEO_TUTORIAL, + VIDEO_LECTURE, + VIDEO_WEBINAR, + VIDEO_GAME, + VIDEO_ANIMATION, + VIDEO_CUTSCENE, + VIDEO_TRAILER, + VIDEO_ADVERTISEMENT, +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/RAG/RetrievalContentTypeExtensions.cs b/app/MindWork AI Studio/Tools/RAG/RetrievalContentTypeExtensions.cs new file mode 100644 index 0000000..e591bf8 --- /dev/null +++ b/app/MindWork AI Studio/Tools/RAG/RetrievalContentTypeExtensions.cs @@ -0,0 +1,141 @@ +using AIStudio.Tools.ERIClient.DataModel; + +namespace AIStudio.Tools.RAG; + +public static class RetrievalContentTypeExtensions +{ + public static RetrievalContentType ToRetrievalContentType(Context eriContext) + { + // + // Right now, we have to parse the category string along the type enum to + // determine the common retrieval content type. In future ERI versions, we + // might use the same enum. + // + + var lowerCategory = eriContext.Category.ToLowerInvariant(); + var type = eriContext.Type; + return type switch + { + ContentType.TEXT when lowerCategory.Contains("book") => RetrievalContentType.TEXT_BOOK, + ContentType.TEXT when lowerCategory.Contains("paper") => RetrievalContentType.TEXT_PAPER, + ContentType.TEXT when lowerCategory.Contains("dictionary") => RetrievalContentType.TEXT_DICTIONARY, + ContentType.TEXT when lowerCategory.Contains("encyclopedia") => RetrievalContentType.TEXT_ENCYCLOPEDIA, + ContentType.TEXT when lowerCategory.Contains("glossary") => RetrievalContentType.TEXT_GLOSSARY, + ContentType.TEXT when lowerCategory.Contains("journal") => RetrievalContentType.TEXT_JOURNAL, + ContentType.TEXT when lowerCategory.Contains("magazine") => RetrievalContentType.TEXT_MAGAZINE, + ContentType.TEXT when lowerCategory.Contains("newspaper") => RetrievalContentType.TEXT_NEWSPAPER, + ContentType.TEXT when lowerCategory.Contains("report") => RetrievalContentType.TEXT_REPORT, + ContentType.TEXT when lowerCategory.Contains("review") => RetrievalContentType.TEXT_REVIEW, + ContentType.TEXT when lowerCategory.Contains("website") => RetrievalContentType.TEXT_WEBSITE, + ContentType.TEXT when lowerCategory.Contains("idea") => RetrievalContentType.TEXT_IDEA, + + ContentType.TEXT when lowerCategory.Contains("business concept") => RetrievalContentType.TEXT_BUSINESS_CONCEPT, + ContentType.TEXT when lowerCategory.Contains("concept") => RetrievalContentType.TEXT_CONCEPT, + + ContentType.TEXT when lowerCategory.Contains("definition") => RetrievalContentType.TEXT_DEFINITION, + ContentType.TEXT when lowerCategory.Contains("example") => RetrievalContentType.TEXT_EXAMPLE, + ContentType.TEXT when lowerCategory.Contains("quote") => RetrievalContentType.TEXT_QUOTE, + ContentType.TEXT when lowerCategory.Contains("article") => RetrievalContentType.TEXT_ARTICLE, + ContentType.TEXT when lowerCategory.Contains("chapter") => RetrievalContentType.TEXT_CHAPTER, + + ContentType.TEXT when lowerCategory.Contains("thesis") => RetrievalContentType.TEXT_THESIS, + ContentType.TEXT when lowerCategory.Contains("dissertation") => RetrievalContentType.TEXT_THESIS, + + ContentType.TEXT when lowerCategory.Contains("draft") => RetrievalContentType.TEXT_DRAFT, + ContentType.TEXT when lowerCategory.Contains("script") => RetrievalContentType.TEXT_SCRIPT, + ContentType.TEXT when lowerCategory.Contains("transcript") => RetrievalContentType.TEXT_TRANSCRIPT, + ContentType.TEXT when lowerCategory.Contains("subtitle") => RetrievalContentType.TEXT_SUBTITLE, + ContentType.TEXT when lowerCategory.Contains("caption") => RetrievalContentType.TEXT_CAPTION, + ContentType.TEXT when lowerCategory.Contains("dialogue") => RetrievalContentType.TEXT_DIALOGUE, + ContentType.TEXT when lowerCategory.Contains("project proposal") => RetrievalContentType.TEXT_PROJECT_PROPOSAL, + ContentType.TEXT when lowerCategory.Contains("project plan") => RetrievalContentType.TEXT_PROJECT_PLAN, + ContentType.TEXT when lowerCategory.Contains("spreadsheet") => RetrievalContentType.TEXT_SPREADSHEET, + + ContentType.TEXT when lowerCategory.Contains("presentation") => RetrievalContentType.TEXT_PRESENTATION, + ContentType.TEXT when lowerCategory.Contains("powerpoint") => RetrievalContentType.TEXT_PRESENTATION, + ContentType.TEXT when lowerCategory.Contains("slide") => RetrievalContentType.TEXT_PRESENTATION, + + ContentType.TEXT when lowerCategory.Contains("meeting minutes") => RetrievalContentType.TEXT_MEETING_MINUTES, + ContentType.TEXT when lowerCategory.Contains("email") => RetrievalContentType.TEXT_EMAIL, + ContentType.TEXT when lowerCategory.Contains("protocol") => RetrievalContentType.TEXT_PROTOCOL, + + ContentType.TEXT => RetrievalContentType.TEXT_DOCUMENT, + + + ContentType.IMAGE when lowerCategory.Contains("photo") => RetrievalContentType.IMAGE_PHOTO, + ContentType.IMAGE when lowerCategory.Contains("illustration") => RetrievalContentType.IMAGE_ILLUSTRATION, + ContentType.IMAGE when lowerCategory.Contains("diagram") => RetrievalContentType.IMAGE_DIAGRAM, + ContentType.IMAGE when lowerCategory.Contains("chart") => RetrievalContentType.IMAGE_CHART, + ContentType.IMAGE when lowerCategory.Contains("art") => RetrievalContentType.IMAGE_ART, + ContentType.IMAGE when lowerCategory.Contains("drawing") => RetrievalContentType.IMAGE_DRAWING, + ContentType.IMAGE when lowerCategory.Contains("painting") => RetrievalContentType.IMAGE_PAINTING, + ContentType.IMAGE when lowerCategory.Contains("sketch") => RetrievalContentType.IMAGE_SKETCH, + ContentType.IMAGE when lowerCategory.Contains("map") => RetrievalContentType.IMAGE_MAP, + ContentType.IMAGE when lowerCategory.Contains("scene") => RetrievalContentType.IMAGE_SCENE, + ContentType.IMAGE when lowerCategory.Contains("character") => RetrievalContentType.IMAGE_CHARACTER, + ContentType.IMAGE when lowerCategory.Contains("landscape") => RetrievalContentType.IMAGE_LANDSCAPE, + ContentType.IMAGE when lowerCategory.Contains("portrait") => RetrievalContentType.IMAGE_PORTRAIT, + ContentType.IMAGE when lowerCategory.Contains("poster") => RetrievalContentType.IMAGE_POSTER, + ContentType.IMAGE when lowerCategory.Contains("logo") => RetrievalContentType.IMAGE_LOGO, + ContentType.IMAGE when lowerCategory.Contains("icon") => RetrievalContentType.IMAGE_ICON, + + ContentType.IMAGE when lowerCategory.Contains("satellite") => RetrievalContentType.IMAGE_SATELLITE_IMAGE, + ContentType.IMAGE when lowerCategory.Contains("EO") => RetrievalContentType.IMAGE_SATELLITE_IMAGE, + ContentType.IMAGE when lowerCategory.Contains("earth observation") => RetrievalContentType.IMAGE_SATELLITE_IMAGE, + + ContentType.IMAGE => RetrievalContentType.NOT_SPECIFIED, + + + ContentType.AUDIO when lowerCategory.Contains("speech") => RetrievalContentType.AUDIO_SPEECH, + + ContentType.AUDIO when lowerCategory.Contains("podcast") => RetrievalContentType.AUDIO_PODCAST, + ContentType.SPEECH when lowerCategory.Contains("podcast") => RetrievalContentType.AUDIO_PODCAST, + + ContentType.AUDIO when lowerCategory.Contains("book") => RetrievalContentType.AUDIO_BOOK, + ContentType.SPEECH when lowerCategory.Contains("book") => RetrievalContentType.AUDIO_BOOK, + + ContentType.AUDIO when lowerCategory.Contains("interview") => RetrievalContentType.AUDIO_INTERVIEW, + ContentType.SPEECH when lowerCategory.Contains("interview") => RetrievalContentType.AUDIO_INTERVIEW, + + ContentType.AUDIO when lowerCategory.Contains("lecture") => RetrievalContentType.AUDIO_LECTURE, + ContentType.SPEECH when lowerCategory.Contains("lecture") => RetrievalContentType.AUDIO_LECTURE, + + ContentType.AUDIO when lowerCategory.Contains("talk") => RetrievalContentType.AUDIO_TALK, + ContentType.SPEECH when lowerCategory.Contains("talk") => RetrievalContentType.AUDIO_TALK, + + ContentType.AUDIO when lowerCategory.Contains("song") => RetrievalContentType.AUDIO_SONG, + ContentType.AUDIO when lowerCategory.Contains("music") => RetrievalContentType.AUDIO_MUSIC, + ContentType.AUDIO when lowerCategory.Contains("sound") => RetrievalContentType.AUDIO_SOUND, + ContentType.AUDIO when lowerCategory.Contains("call") => RetrievalContentType.AUDIO_CALL, + ContentType.AUDIO when lowerCategory.Contains("voice acting") => RetrievalContentType.AUDIO_VOICE_ACTING, + ContentType.AUDIO when lowerCategory.Contains("description") => RetrievalContentType.AUDIO_DESCRIPTION, + ContentType.AUDIO when lowerCategory.Contains("guide") => RetrievalContentType.AUDIO_GUIDE, + ContentType.AUDIO when lowerCategory.Contains("dialogue") => RetrievalContentType.AUDIO_DIALOGUE, + + ContentType.SPEECH => RetrievalContentType.AUDIO_SPEECH, + ContentType.AUDIO => RetrievalContentType.NOT_SPECIFIED, + + + ContentType.VIDEO when lowerCategory.Contains("movie") => RetrievalContentType.VIDEO_MOVIE, + ContentType.VIDEO when lowerCategory.Contains("film") => RetrievalContentType.VIDEO_FILM, + ContentType.VIDEO when lowerCategory.Contains("tv show") => RetrievalContentType.VIDEO_TV_SHOW, + ContentType.VIDEO when lowerCategory.Contains("series") => RetrievalContentType.VIDEO_SERIES, + ContentType.VIDEO when lowerCategory.Contains("episode") => RetrievalContentType.VIDEO_EPISODE, + ContentType.VIDEO when lowerCategory.Contains("documentary") => RetrievalContentType.VIDEO_DOCUMENTARY, + ContentType.VIDEO when lowerCategory.Contains("tutorial") => RetrievalContentType.VIDEO_TUTORIAL, + ContentType.VIDEO when lowerCategory.Contains("lecture") => RetrievalContentType.VIDEO_LECTURE, + ContentType.VIDEO when lowerCategory.Contains("webinar") => RetrievalContentType.VIDEO_WEBINAR, + ContentType.VIDEO when lowerCategory.Contains("game") => RetrievalContentType.VIDEO_GAME, + ContentType.VIDEO when lowerCategory.Contains("animation") => RetrievalContentType.VIDEO_ANIMATION, + ContentType.VIDEO when lowerCategory.Contains("cutscene") => RetrievalContentType.VIDEO_CUTSCENE, + ContentType.VIDEO when lowerCategory.Contains("trailer") => RetrievalContentType.VIDEO_TRAILER, + ContentType.VIDEO when lowerCategory.Contains("advertisement") => RetrievalContentType.VIDEO_ADVERTISEMENT, + + ContentType.VIDEO => RetrievalContentType.NOT_SPECIFIED, + + ContentType.NONE => RetrievalContentType.NOT_SPECIFIED, + ContentType.UNKNOWN => RetrievalContentType.UNKNOWN, + _ => RetrievalContentType.UNKNOWN, + }; + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/RAG/RetrievalImageContext.cs b/app/MindWork AI Studio/Tools/RAG/RetrievalImageContext.cs new file mode 100644 index 0000000..37cadb9 --- /dev/null +++ b/app/MindWork AI Studio/Tools/RAG/RetrievalImageContext.cs @@ -0,0 +1,38 @@ +using AIStudio.Chat; + +namespace AIStudio.Tools.RAG; + +public sealed class RetrievalImageContext : IRetrievalContext +{ + #region Implementation of IRetrievalContext + + /// + public required string DataSourceName { get; init; } + + /// + public required RetrievalContentCategory Category { get; init; } + + /// + public required RetrievalContentType Type { get; init; } + + /// + public required string Path { get; init; } + + /// + public IReadOnlyList Links { get; init; } = []; + + #endregion + + /// + /// The type of the image source. + /// + /// + /// Is the image source a URL, a local file path, a base64 string, etc.? + /// + public required ContentImageSource SourceType { get; init; } + + /// + /// The image source. + /// + public required string Source { get; set; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/RAG/RetrievalTextContext.cs b/app/MindWork AI Studio/Tools/RAG/RetrievalTextContext.cs new file mode 100644 index 0000000..9997e57 --- /dev/null +++ b/app/MindWork AI Studio/Tools/RAG/RetrievalTextContext.cs @@ -0,0 +1,43 @@ +namespace AIStudio.Tools.RAG; + +/// +/// The retrieval context for text data. +/// +public sealed class RetrievalTextContext : IRetrievalContext +{ + #region Implementation of IRetrievalContext + + /// + public required string DataSourceName { get; init; } + + /// + public required RetrievalContentCategory Category { get; init; } + + /// + public required RetrievalContentType Type { get; init; } + + /// + public required string Path { get; init; } + + /// + public IReadOnlyList Links { get; init; } = []; + + #endregion + + /// + /// The text content. + /// + /// + /// Should contain the matched text and some small context around it. + /// + public required string MatchedText { get; set; } + + /// + /// The surrounding content of the matched text. + /// + /// + /// Might give the user some context about the matched text. + /// For example, one sentence or paragraph before and after the matched text. + /// + public IReadOnlyList SurroundingContent { get; set; } = []; +} \ No newline at end of file diff --git a/app/MindWork AI Studio/wwwroot/changelog/v0.9.29.md b/app/MindWork AI Studio/wwwroot/changelog/v0.9.29.md new file mode 100644 index 0000000..ecb016a --- /dev/null +++ b/app/MindWork AI Studio/wwwroot/changelog/v0.9.29.md @@ -0,0 +1 @@ +# v0.9.29, build 204 (2025-02-xx xx:xx UTC)