Refactored the algorithm to determine an optimal confidence threshold into a common extension method

2025-11-15 04:40:20 +00:00 · 2025-02-19 11:30:33 +01:00 · 2025-02-19 11:30:33 +01:00 · 9c446c54ec
commit 9c446c54ec
parent 129e0d0779
5 changed files with 72 additions and 37 deletions
--- a/Studio/Agents/RetrievalContextValidationResult.cs
+++ b/Studio/Agents/RetrievalContextValidationResult.cs
@ -6,4 +6,4 @@ namespace AIStudio.Agents;
 /// <param name="Decision">Whether the retrieval context is useful or not.</param>
 /// <param name="Reason">The reason for the decision.</param>
 /// <param name="Confidence">The confidence of the decision.</param>
-public readonly record struct RetrievalContextValidationResult(bool Decision, string Reason, float Confidence);
+public readonly record struct RetrievalContextValidationResult(bool Decision, string Reason, float Confidence) : IConfidence;
--- a/Studio/Agents/SelectedDataSource.cs
+++ b/Studio/Agents/SelectedDataSource.cs
@ -6,4 +6,4 @@ namespace AIStudio.Agents;
 /// <param name="Id">The data source ID.</param>
 /// <param name="Reason">The reason for selecting the data source.</param>
 /// <param name="Confidence">The confidence of the agent in the selection.</param>
-public readonly record struct SelectedDataSource(string Id, string Reason, float Confidence);
+public readonly record struct SelectedDataSource(string Id, string Reason, float Confidence) : IConfidence;
--- a/Studio/Tools/IConfidence.cs
+++ b/Studio/Tools/IConfidence.cs
@ -0,0 +1,16 @@
 namespace AIStudio.Tools;
 /// <summary>
 /// A contract for data classes with a confidence value.
 /// </summary>
 /// <remarks>
 /// Using this confidence contract allows us to provide
 /// algorithms based on confidence values.
 /// </remarks>
 public interface IConfidence
 {
    /// <summary>
    /// How confident the AI in this task or decision?
    /// </summary>
    public float Confidence { get; init; }
 }
--- a/Studio/Tools/IConfidenceExtensions.cs
+++ b/Studio/Tools/IConfidenceExtensions.cs
@ -0,0 +1,52 @@
 namespace AIStudio.Tools;
 public static class IConfidenceExtensions
 {
    /// <summary>
    /// Determine the optimal confidence threshold for a list of items
    /// in order to match a target window of number of items.
    /// </summary>
    /// <param name="items">The list of confidence items to analyze.</param>
    /// <param name="targetWindowMin">The minimum number of items in the target window. Should be at least 2 and more than numMinItems.</param>
    /// <param name="targetWindowMax">The maximum number of items in the target window.</param>
    /// <param name="numMinItems">The minimum number of items to match the threshold. Should be at least 1 and less than targetWindowMin.</param>
    /// <param name="maxSteps">The maximum number of steps to search for the threshold.</param>
    /// <typeparam name="T">The type of items in the list.</typeparam>
    /// <returns>The confidence threshold.</returns>
    public static float GetConfidenceThreshold<T>(this IList<T> items, int targetWindowMin = 2, int targetWindowMax = 3, int numMinItems = 1, int maxSteps = 10) where T : IConfidence
    {
        var confidenceValues = items.Select(x => x.Confidence).ToList();
        var lowerBound = confidenceValues.Min();
        var upperBound = confidenceValues.Max();
        //
        // We search for a threshold so that we have between
        // targetWindowMin and targetWindowMax items. When not
        // possible, we take all items (i.e., threshold = 0f)
        //
        var threshold = 0.0f;
        // Check the case where the confidence values are too close:
        if (upperBound - lowerBound >= 0.01)
        {
            var previousThreshold = 0.0f;
            for (var i = 0; i < maxSteps; i++)
            {
                threshold = lowerBound + (upperBound - lowerBound) * i / maxSteps;
                var numMatches = items.Count(x => x.Confidence >= threshold);
                if (numMatches <= numMinItems)
                {
                    threshold = previousThreshold;
                    break;
                }
                if (numMatches <= targetWindowMax && numMatches >= targetWindowMin)
                    break;
                previousThreshold = threshold;
            }
        }
        return threshold;
    }
 }
--- a/Studio/Tools/RAG/DataSourceSelectionProcesses/AgenticSrcSelWithDynHeur.cs
+++ b/Studio/Tools/RAG/DataSourceSelectionProcesses/AgenticSrcSelWithDynHeur.cs
@ -70,41 +70,8 @@ public class AgenticSrcSelWithDynHeur : IDataSourceSelectionProcess
            if (aiSelectedDataSources.Count > 3)
            {
-                //
+                // We have more than 3 data sources. Let's filter by confidence:
-                // We have more than 3 data sources. Let's filter by confidence.
+                var threshold = aiSelectedDataSources.GetConfidenceThreshold();
                // In order to do that, we must identify the lower and upper
                // bounds of the confidence interval:
                //
                var confidenceValues = aiSelectedDataSources.Select(x => x.Confidence).ToList();
                var lowerBound = confidenceValues.Min();
                var upperBound = confidenceValues.Max();
                //
                // Next, we search for a threshold so that we have between 2 and 3
                // data sources. When not possible, we take all data sources.
                //
                var threshold = 0.0f;
                // Check the case where the confidence values are too close:
                if (upperBound - lowerBound >= 0.01)
                {
                    var previousThreshold = 0.0f;
                    for (var i = 0; i < 10; i++)
                    {
                        threshold = lowerBound + (upperBound - lowerBound) * i / 10;
                        var numMatches = aiSelectedDataSources.Count(x => x.Confidence >= threshold);
                        if (numMatches <= 1)
                        {
                            threshold = previousThreshold;
                            break;
                        }
                        if (numMatches is <= 3 and >= 2)
                            break;
                        previousThreshold = threshold;
                    }
                }
                //
                // Filter the data sources by the threshold: