From 9c446c54ecd611b3fb9a0410cfe511c8cc19684c Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Wed, 19 Feb 2025 11:30:33 +0100 Subject: [PATCH] Refactored the algorithm to determine an optimal confidence threshold into a common extension method --- .../RetrievalContextValidationResult.cs | 2 +- .../Agents/SelectedDataSource.cs | 2 +- app/MindWork AI Studio/Tools/IConfidence.cs | 16 ++++++ .../Tools/IConfidenceExtensions.cs | 52 +++++++++++++++++++ .../AgenticSrcSelWithDynHeur.cs | 37 +------------ 5 files changed, 72 insertions(+), 37 deletions(-) create mode 100644 app/MindWork AI Studio/Tools/IConfidence.cs create mode 100644 app/MindWork AI Studio/Tools/IConfidenceExtensions.cs diff --git a/app/MindWork AI Studio/Agents/RetrievalContextValidationResult.cs b/app/MindWork AI Studio/Agents/RetrievalContextValidationResult.cs index 4b3bd810..418e59ee 100644 --- a/app/MindWork AI Studio/Agents/RetrievalContextValidationResult.cs +++ b/app/MindWork AI Studio/Agents/RetrievalContextValidationResult.cs @@ -6,4 +6,4 @@ namespace AIStudio.Agents; /// Whether the retrieval context is useful or not. /// The reason for the decision. /// The confidence of the decision. -public readonly record struct RetrievalContextValidationResult(bool Decision, string Reason, float Confidence); \ No newline at end of file +public readonly record struct RetrievalContextValidationResult(bool Decision, string Reason, float Confidence) : IConfidence; \ No newline at end of file diff --git a/app/MindWork AI Studio/Agents/SelectedDataSource.cs b/app/MindWork AI Studio/Agents/SelectedDataSource.cs index c8b7192d..ca2c2a9f 100644 --- a/app/MindWork AI Studio/Agents/SelectedDataSource.cs +++ b/app/MindWork AI Studio/Agents/SelectedDataSource.cs @@ -6,4 +6,4 @@ namespace AIStudio.Agents; /// The data source ID. /// The reason for selecting the data source. /// The confidence of the agent in the selection. -public readonly record struct SelectedDataSource(string Id, string Reason, float Confidence); \ No newline at end of file +public readonly record struct SelectedDataSource(string Id, string Reason, float Confidence) : IConfidence; \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/IConfidence.cs b/app/MindWork AI Studio/Tools/IConfidence.cs new file mode 100644 index 00000000..1279ebee --- /dev/null +++ b/app/MindWork AI Studio/Tools/IConfidence.cs @@ -0,0 +1,16 @@ +namespace AIStudio.Tools; + +/// +/// A contract for data classes with a confidence value. +/// +/// +/// Using this confidence contract allows us to provide +/// algorithms based on confidence values. +/// +public interface IConfidence +{ + /// + /// How confident the AI in this task or decision? + /// + public float Confidence { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/IConfidenceExtensions.cs b/app/MindWork AI Studio/Tools/IConfidenceExtensions.cs new file mode 100644 index 00000000..1e1c7d4b --- /dev/null +++ b/app/MindWork AI Studio/Tools/IConfidenceExtensions.cs @@ -0,0 +1,52 @@ +namespace AIStudio.Tools; + +public static class IConfidenceExtensions +{ + /// + /// Determine the optimal confidence threshold for a list of items + /// in order to match a target window of number of items. + /// + /// The list of confidence items to analyze. + /// The minimum number of items in the target window. Should be at least 2 and more than numMinItems. + /// The maximum number of items in the target window. + /// The minimum number of items to match the threshold. Should be at least 1 and less than targetWindowMin. + /// The maximum number of steps to search for the threshold. + /// The type of items in the list. + /// The confidence threshold. + public static float GetConfidenceThreshold(this IList items, int targetWindowMin = 2, int targetWindowMax = 3, int numMinItems = 1, int maxSteps = 10) where T : IConfidence + { + var confidenceValues = items.Select(x => x.Confidence).ToList(); + var lowerBound = confidenceValues.Min(); + var upperBound = confidenceValues.Max(); + + // + // We search for a threshold so that we have between + // targetWindowMin and targetWindowMax items. When not + // possible, we take all items (i.e., threshold = 0f) + // + var threshold = 0.0f; + + // Check the case where the confidence values are too close: + if (upperBound - lowerBound >= 0.01) + { + var previousThreshold = 0.0f; + for (var i = 0; i < maxSteps; i++) + { + threshold = lowerBound + (upperBound - lowerBound) * i / maxSteps; + var numMatches = items.Count(x => x.Confidence >= threshold); + if (numMatches <= numMinItems) + { + threshold = previousThreshold; + break; + } + + if (numMatches <= targetWindowMax && numMatches >= targetWindowMin) + break; + + previousThreshold = threshold; + } + } + + return threshold; + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/RAG/DataSourceSelectionProcesses/AgenticSrcSelWithDynHeur.cs b/app/MindWork AI Studio/Tools/RAG/DataSourceSelectionProcesses/AgenticSrcSelWithDynHeur.cs index 64099780..43f8d05c 100644 --- a/app/MindWork AI Studio/Tools/RAG/DataSourceSelectionProcesses/AgenticSrcSelWithDynHeur.cs +++ b/app/MindWork AI Studio/Tools/RAG/DataSourceSelectionProcesses/AgenticSrcSelWithDynHeur.cs @@ -70,41 +70,8 @@ public class AgenticSrcSelWithDynHeur : IDataSourceSelectionProcess if (aiSelectedDataSources.Count > 3) { - // - // We have more than 3 data sources. Let's filter by confidence. - // In order to do that, we must identify the lower and upper - // bounds of the confidence interval: - // - var confidenceValues = aiSelectedDataSources.Select(x => x.Confidence).ToList(); - var lowerBound = confidenceValues.Min(); - var upperBound = confidenceValues.Max(); - - // - // Next, we search for a threshold so that we have between 2 and 3 - // data sources. When not possible, we take all data sources. - // - var threshold = 0.0f; - - // Check the case where the confidence values are too close: - if (upperBound - lowerBound >= 0.01) - { - var previousThreshold = 0.0f; - for (var i = 0; i < 10; i++) - { - threshold = lowerBound + (upperBound - lowerBound) * i / 10; - var numMatches = aiSelectedDataSources.Count(x => x.Confidence >= threshold); - if (numMatches <= 1) - { - threshold = previousThreshold; - break; - } - - if (numMatches is <= 3 and >= 2) - break; - - previousThreshold = threshold; - } - } + // We have more than 3 data sources. Let's filter by confidence: + var threshold = aiSelectedDataSources.GetConfidenceThreshold(); // // Filter the data sources by the threshold: