Refactored a target window struct with multiple strategies

This commit is contained in:
Thorsten Sommer 2025-02-22 20:02:29 +01:00
parent 1d93a697ad
commit 3b42a4ed0a
Signed by: tsommer
GPG Key ID: 371BBA77A02C0108
4 changed files with 118 additions and 11 deletions

View File

@ -2,50 +2,99 @@ namespace AIStudio.Tools;
public static class IConfidenceExtensions
{
public static TargetWindow DetermineTargetWindow<T>(this IReadOnlyList<T> items, TargetWindowStrategy strategy, int numMaximumItems = 30) where T : IConfidence
{
switch (strategy)
{
case TargetWindowStrategy.A_FEW_GOOD_ONES:
return new(1, 2, 3, 0f);
case TargetWindowStrategy.TOP10_BETTER_THAN_GUESSING:
var numItemsBetterThanGuessing = items.Count(x => x.Confidence > 0.5f);
if(numItemsBetterThanGuessing < 3)
return new(1, 2, 3, 0.5f);
// We want the top 10% of items better than guessing:
var numTop10Percent = (int) MathF.Floor(numItemsBetterThanGuessing * 0.1f);
// When these 10% are just a few items, we take them all:
if (numTop10Percent <= 10)
{
var diff = numItemsBetterThanGuessing - numTop10Percent;
var num50Percent = (int) MathF.Floor(numItemsBetterThanGuessing * 0.5f);
return new(num50Percent, num50Percent + 1, Math.Max(numItemsBetterThanGuessing, diff), 0.5f);
}
// Let's define the size of the window:
const int MIN_NUM_ITEMS = 3;
var windowMin = Math.Max(MIN_NUM_ITEMS + 1, numTop10Percent);
windowMin = Math.Min(windowMin, numMaximumItems - 1);
var totalMin = Math.Max(MIN_NUM_ITEMS, windowMin - 3);
var windowSize = (int)MathF.Max(MathF.Floor(numTop10Percent * 0.1f), MathF.Min(10, numTop10Percent));
var windowMax = Math.Min(numMaximumItems, numTop10Percent + windowSize);
return new(totalMin, windowMin, windowMax, 0.5f);
case TargetWindowStrategy.NONE:
default:
return new(-1, -1, -1, 0f);
}
}
/// <summary>
/// Determine the optimal confidence threshold for a list of items
/// in order to match a target window of number of items.
/// </summary>
/// <param name="items">The list of confidence items to analyze.</param>
/// <param name="targetWindowMin">The minimum number of items in the target window. Should be at least 2 and more than numMinItems.</param>
/// <param name="targetWindowMax">The maximum number of items in the target window.</param>
/// <param name="numMinItems">The minimum number of items to match the threshold. Should be at least 1 and less than targetWindowMin.</param>
/// <param name="targetWindow">The target window for the number of items.</param>
/// <param name="maxSteps">The maximum number of steps to search for the threshold.</param>
/// <typeparam name="T">The type of items in the list.</typeparam>
/// <returns>The confidence threshold.</returns>
public static float GetConfidenceThreshold<T>(this IList<T> items, int targetWindowMin = 2, int targetWindowMax = 3, int numMinItems = 1, int maxSteps = 10) where T : IConfidence
public static float GetConfidenceThreshold<T>(this IReadOnlyList<T> items, TargetWindow targetWindow, int maxSteps = 10) where T : IConfidence
{
if(!targetWindow.IsValid())
{
var logger = Program.SERVICE_PROVIDER.GetService<ILogger<IConfidence>>()!;
logger.LogWarning("The target window is invalid. Returning 0f as threshold.");
return 0f;
}
var confidenceValues = items.Select(x => x.Confidence).ToList();
var lowerBound = confidenceValues.Min();
var minConfidence = confidenceValues.Min();
var lowerBound = MathF.Max(minConfidence, targetWindow.MinThreshold);
var upperBound = confidenceValues.Max();
//
// We search for a threshold so that we have between
// targetWindowMin and targetWindowMax items. When not
// possible, we take all items (i.e., threshold = 0f)
// possible, we take all items (e.g., threshold = 0f; depends on the used window strategy)
//
var threshold = 0.0f;
// Check the case where the confidence values are too close:
if (upperBound - lowerBound >= 0.01)
if (upperBound - minConfidence >= 0.01)
{
var previousThreshold = 0.0f;
var previousThreshold = threshold;
for (var i = 0; i < maxSteps; i++)
{
threshold = lowerBound + (upperBound - lowerBound) * i / maxSteps;
var numMatches = items.Count(x => x.Confidence >= threshold);
if (numMatches <= numMinItems)
if (numMatches <= targetWindow.NumMinItems)
{
threshold = previousThreshold;
break;
}
if (numMatches <= targetWindowMax && numMatches >= targetWindowMin)
if (targetWindow.InsideWindow(numMatches))
break;
previousThreshold = threshold;
}
}
else
{
var logger = Program.SERVICE_PROVIDER.GetService<ILogger<IConfidence>>()!;
logger.LogWarning("The confidence values are too close. Returning 0f as threshold.");
}
return threshold;
}

View File

@ -71,7 +71,8 @@ public class AgenticSrcSelWithDynHeur : IDataSourceSelectionProcess
if (aiSelectedDataSources.Count > 3)
{
// We have more than 3 data sources. Let's filter by confidence:
var threshold = aiSelectedDataSources.GetConfidenceThreshold();
var targetWindow = aiSelectedDataSources.DetermineTargetWindow(TargetWindowStrategy.A_FEW_GOOD_ONES);
var threshold = aiSelectedDataSources.GetConfidenceThreshold(targetWindow);
//
// Filter the data sources by the threshold:

View File

@ -0,0 +1,38 @@
namespace AIStudio.Tools;
/// <summary>
/// Represents a target window for the number of items to match a threshold.
/// </summary>
/// <param name="NumMinItems">The minimum number of items to match the threshold. Should be at least one and less than targetWindowMin.</param>
/// <param name="TargetWindowMin">The minimum number of items in the target window. Should be at least 2 and more than numMinItems.</param>
/// <param name="TargetWindowMax">The maximum number of items in the target window.</param>
public readonly record struct TargetWindow(int NumMinItems, int TargetWindowMin, int TargetWindowMax, float MinThreshold)
{
/// <summary>
/// Determines if the target window is valid.
/// </summary>
/// <returns>True when the target window is valid; otherwise, false.</returns>
public bool IsValid()
{
if(this.NumMinItems < 1)
return false;
if(this.TargetWindowMin < this.NumMinItems)
return false;
if(this.TargetWindowMax < this.TargetWindowMin)
return false;
if(this.MinThreshold is < 0f or > 1f)
return false;
return true;
}
/// <summary>
/// Determines if the number of items is inside the target window.
/// </summary>
/// <param name="numItems">The number of items to check.</param>
/// <returns>True when the number of items is inside the target window; otherwise, false.</returns>
public bool InsideWindow(int numItems) => numItems >= this.TargetWindowMin && numItems <= this.TargetWindowMax;
}

View File

@ -0,0 +1,19 @@
namespace AIStudio.Tools;
public enum TargetWindowStrategy
{
/// <summary>
/// Means no target window strategy, which will effectively return all items.
/// </summary>
NONE,
/// <summary>
/// Searches for two up-to-three items but at least one.
/// </summary>
A_FEW_GOOD_ONES,
/// <summary>
/// Searches for the top 10% items that are better than guessing, i.e., with confidence greater than 0.5f.
/// </summary>
TOP10_BETTER_THAN_GUESSING,
}