Refactored the algorithm to determine an optimal confidence threshold into a common extension method

This commit is contained in:
Thorsten Sommer 2025-02-19 11:30:33 +01:00
parent 129e0d0779
commit 9c446c54ec
Signed by: tsommer
GPG Key ID: 371BBA77A02C0108
5 changed files with 72 additions and 37 deletions

View File

@ -6,4 +6,4 @@ namespace AIStudio.Agents;
/// <param name="Decision">Whether the retrieval context is useful or not.</param>
/// <param name="Reason">The reason for the decision.</param>
/// <param name="Confidence">The confidence of the decision.</param>
public readonly record struct RetrievalContextValidationResult(bool Decision, string Reason, float Confidence);
public readonly record struct RetrievalContextValidationResult(bool Decision, string Reason, float Confidence) : IConfidence;

View File

@ -6,4 +6,4 @@ namespace AIStudio.Agents;
/// <param name="Id">The data source ID.</param>
/// <param name="Reason">The reason for selecting the data source.</param>
/// <param name="Confidence">The confidence of the agent in the selection.</param>
public readonly record struct SelectedDataSource(string Id, string Reason, float Confidence);
public readonly record struct SelectedDataSource(string Id, string Reason, float Confidence) : IConfidence;

View File

@ -0,0 +1,16 @@
namespace AIStudio.Tools;
/// <summary>
/// A contract for data classes with a confidence value.
/// </summary>
/// <remarks>
/// Using this confidence contract allows us to provide
/// algorithms based on confidence values.
/// </remarks>
public interface IConfidence
{
/// <summary>
/// How confident the AI in this task or decision?
/// </summary>
public float Confidence { get; init; }
}

View File

@ -0,0 +1,52 @@
namespace AIStudio.Tools;
public static class IConfidenceExtensions
{
/// <summary>
/// Determine the optimal confidence threshold for a list of items
/// in order to match a target window of number of items.
/// </summary>
/// <param name="items">The list of confidence items to analyze.</param>
/// <param name="targetWindowMin">The minimum number of items in the target window. Should be at least 2 and more than numMinItems.</param>
/// <param name="targetWindowMax">The maximum number of items in the target window.</param>
/// <param name="numMinItems">The minimum number of items to match the threshold. Should be at least 1 and less than targetWindowMin.</param>
/// <param name="maxSteps">The maximum number of steps to search for the threshold.</param>
/// <typeparam name="T">The type of items in the list.</typeparam>
/// <returns>The confidence threshold.</returns>
public static float GetConfidenceThreshold<T>(this IList<T> items, int targetWindowMin = 2, int targetWindowMax = 3, int numMinItems = 1, int maxSteps = 10) where T : IConfidence
{
var confidenceValues = items.Select(x => x.Confidence).ToList();
var lowerBound = confidenceValues.Min();
var upperBound = confidenceValues.Max();
//
// We search for a threshold so that we have between
// targetWindowMin and targetWindowMax items. When not
// possible, we take all items (i.e., threshold = 0f)
//
var threshold = 0.0f;
// Check the case where the confidence values are too close:
if (upperBound - lowerBound >= 0.01)
{
var previousThreshold = 0.0f;
for (var i = 0; i < maxSteps; i++)
{
threshold = lowerBound + (upperBound - lowerBound) * i / maxSteps;
var numMatches = items.Count(x => x.Confidence >= threshold);
if (numMatches <= numMinItems)
{
threshold = previousThreshold;
break;
}
if (numMatches <= targetWindowMax && numMatches >= targetWindowMin)
break;
previousThreshold = threshold;
}
}
return threshold;
}
}

View File

@ -70,41 +70,8 @@ public class AgenticSrcSelWithDynHeur : IDataSourceSelectionProcess
if (aiSelectedDataSources.Count > 3)
{
//
// We have more than 3 data sources. Let's filter by confidence.
// In order to do that, we must identify the lower and upper
// bounds of the confidence interval:
//
var confidenceValues = aiSelectedDataSources.Select(x => x.Confidence).ToList();
var lowerBound = confidenceValues.Min();
var upperBound = confidenceValues.Max();
//
// Next, we search for a threshold so that we have between 2 and 3
// data sources. When not possible, we take all data sources.
//
var threshold = 0.0f;
// Check the case where the confidence values are too close:
if (upperBound - lowerBound >= 0.01)
{
var previousThreshold = 0.0f;
for (var i = 0; i < 10; i++)
{
threshold = lowerBound + (upperBound - lowerBound) * i / 10;
var numMatches = aiSelectedDataSources.Count(x => x.Confidence >= threshold);
if (numMatches <= 1)
{
threshold = previousThreshold;
break;
}
if (numMatches is <= 3 and >= 2)
break;
previousThreshold = threshold;
}
}
// We have more than 3 data sources. Let's filter by confidence:
var threshold = aiSelectedDataSources.GetConfidenceThreshold();
//
// Filter the data sources by the threshold: