diff --git a/app/MindWork AI Studio/Tools/IConfidenceExtensions.cs b/app/MindWork AI Studio/Tools/IConfidenceExtensions.cs index 1e1c7d4b..f6f15bfd 100644 --- a/app/MindWork AI Studio/Tools/IConfidenceExtensions.cs +++ b/app/MindWork AI Studio/Tools/IConfidenceExtensions.cs @@ -2,50 +2,99 @@ namespace AIStudio.Tools; public static class IConfidenceExtensions { + public static TargetWindow DetermineTargetWindow(this IReadOnlyList items, TargetWindowStrategy strategy, int numMaximumItems = 30) where T : IConfidence + { + switch (strategy) + { + case TargetWindowStrategy.A_FEW_GOOD_ONES: + return new(1, 2, 3, 0f); + + case TargetWindowStrategy.TOP10_BETTER_THAN_GUESSING: + var numItemsBetterThanGuessing = items.Count(x => x.Confidence > 0.5f); + if(numItemsBetterThanGuessing < 3) + return new(1, 2, 3, 0.5f); + + // We want the top 10% of items better than guessing: + var numTop10Percent = (int) MathF.Floor(numItemsBetterThanGuessing * 0.1f); + + // When these 10% are just a few items, we take them all: + if (numTop10Percent <= 10) + { + var diff = numItemsBetterThanGuessing - numTop10Percent; + var num50Percent = (int) MathF.Floor(numItemsBetterThanGuessing * 0.5f); + return new(num50Percent, num50Percent + 1, Math.Max(numItemsBetterThanGuessing, diff), 0.5f); + } + + // Let's define the size of the window: + const int MIN_NUM_ITEMS = 3; + var windowMin = Math.Max(MIN_NUM_ITEMS + 1, numTop10Percent); + windowMin = Math.Min(windowMin, numMaximumItems - 1); + var totalMin = Math.Max(MIN_NUM_ITEMS, windowMin - 3); + var windowSize = (int)MathF.Max(MathF.Floor(numTop10Percent * 0.1f), MathF.Min(10, numTop10Percent)); + var windowMax = Math.Min(numMaximumItems, numTop10Percent + windowSize); + return new(totalMin, windowMin, windowMax, 0.5f); + + case TargetWindowStrategy.NONE: + default: + return new(-1, -1, -1, 0f); + } + } + /// /// Determine the optimal confidence threshold for a list of items /// in order to match a target window of number of items. /// /// The list of confidence items to analyze. - /// The minimum number of items in the target window. Should be at least 2 and more than numMinItems. - /// The maximum number of items in the target window. - /// The minimum number of items to match the threshold. Should be at least 1 and less than targetWindowMin. + /// The target window for the number of items. /// The maximum number of steps to search for the threshold. /// The type of items in the list. /// The confidence threshold. - public static float GetConfidenceThreshold(this IList items, int targetWindowMin = 2, int targetWindowMax = 3, int numMinItems = 1, int maxSteps = 10) where T : IConfidence + public static float GetConfidenceThreshold(this IReadOnlyList items, TargetWindow targetWindow, int maxSteps = 10) where T : IConfidence { + if(!targetWindow.IsValid()) + { + var logger = Program.SERVICE_PROVIDER.GetService>()!; + logger.LogWarning("The target window is invalid. Returning 0f as threshold."); + return 0f; + } + var confidenceValues = items.Select(x => x.Confidence).ToList(); - var lowerBound = confidenceValues.Min(); + var minConfidence = confidenceValues.Min(); + var lowerBound = MathF.Max(minConfidence, targetWindow.MinThreshold); var upperBound = confidenceValues.Max(); // // We search for a threshold so that we have between // targetWindowMin and targetWindowMax items. When not - // possible, we take all items (i.e., threshold = 0f) + // possible, we take all items (e.g., threshold = 0f; depends on the used window strategy) // var threshold = 0.0f; // Check the case where the confidence values are too close: - if (upperBound - lowerBound >= 0.01) + if (upperBound - minConfidence >= 0.01) { - var previousThreshold = 0.0f; + var previousThreshold = threshold; for (var i = 0; i < maxSteps; i++) { threshold = lowerBound + (upperBound - lowerBound) * i / maxSteps; var numMatches = items.Count(x => x.Confidence >= threshold); - if (numMatches <= numMinItems) + if (numMatches <= targetWindow.NumMinItems) { threshold = previousThreshold; break; } - if (numMatches <= targetWindowMax && numMatches >= targetWindowMin) + if (targetWindow.InsideWindow(numMatches)) break; previousThreshold = threshold; } } + else + { + var logger = Program.SERVICE_PROVIDER.GetService>()!; + logger.LogWarning("The confidence values are too close. Returning 0f as threshold."); + } return threshold; } diff --git a/app/MindWork AI Studio/Tools/RAG/DataSourceSelectionProcesses/AgenticSrcSelWithDynHeur.cs b/app/MindWork AI Studio/Tools/RAG/DataSourceSelectionProcesses/AgenticSrcSelWithDynHeur.cs index 43f8d05c..76f27892 100644 --- a/app/MindWork AI Studio/Tools/RAG/DataSourceSelectionProcesses/AgenticSrcSelWithDynHeur.cs +++ b/app/MindWork AI Studio/Tools/RAG/DataSourceSelectionProcesses/AgenticSrcSelWithDynHeur.cs @@ -71,7 +71,8 @@ public class AgenticSrcSelWithDynHeur : IDataSourceSelectionProcess if (aiSelectedDataSources.Count > 3) { // We have more than 3 data sources. Let's filter by confidence: - var threshold = aiSelectedDataSources.GetConfidenceThreshold(); + var targetWindow = aiSelectedDataSources.DetermineTargetWindow(TargetWindowStrategy.A_FEW_GOOD_ONES); + var threshold = aiSelectedDataSources.GetConfidenceThreshold(targetWindow); // // Filter the data sources by the threshold: diff --git a/app/MindWork AI Studio/Tools/TargetWindow.cs b/app/MindWork AI Studio/Tools/TargetWindow.cs new file mode 100644 index 00000000..8530bacb --- /dev/null +++ b/app/MindWork AI Studio/Tools/TargetWindow.cs @@ -0,0 +1,38 @@ +namespace AIStudio.Tools; + +/// +/// Represents a target window for the number of items to match a threshold. +/// +/// The minimum number of items to match the threshold. Should be at least one and less than targetWindowMin. +/// The minimum number of items in the target window. Should be at least 2 and more than numMinItems. +/// The maximum number of items in the target window. +public readonly record struct TargetWindow(int NumMinItems, int TargetWindowMin, int TargetWindowMax, float MinThreshold) +{ + /// + /// Determines if the target window is valid. + /// + /// True when the target window is valid; otherwise, false. + public bool IsValid() + { + if(this.NumMinItems < 1) + return false; + + if(this.TargetWindowMin < this.NumMinItems) + return false; + + if(this.TargetWindowMax < this.TargetWindowMin) + return false; + + if(this.MinThreshold is < 0f or > 1f) + return false; + + return true; + } + + /// + /// Determines if the number of items is inside the target window. + /// + /// The number of items to check. + /// True when the number of items is inside the target window; otherwise, false. + public bool InsideWindow(int numItems) => numItems >= this.TargetWindowMin && numItems <= this.TargetWindowMax; +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/TargetWindowStrategy.cs b/app/MindWork AI Studio/Tools/TargetWindowStrategy.cs new file mode 100644 index 00000000..90549842 --- /dev/null +++ b/app/MindWork AI Studio/Tools/TargetWindowStrategy.cs @@ -0,0 +1,19 @@ +namespace AIStudio.Tools; + +public enum TargetWindowStrategy +{ + /// + /// Means no target window strategy, which will effectively return all items. + /// + NONE, + + /// + /// Searches for two up-to-three items but at least one. + /// + A_FEW_GOOD_ONES, + + /// + /// Searches for the top 10% items that are better than guessing, i.e., with confidence greater than 0.5f. + /// + TOP10_BETTER_THAN_GUESSING, +} \ No newline at end of file