mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2025-04-28 11:59:48 +00:00
Refactored a target window struct with multiple strategies
This commit is contained in:
parent
1d93a697ad
commit
3b42a4ed0a
@ -2,50 +2,99 @@ namespace AIStudio.Tools;
|
||||
|
||||
public static class IConfidenceExtensions
|
||||
{
|
||||
public static TargetWindow DetermineTargetWindow<T>(this IReadOnlyList<T> items, TargetWindowStrategy strategy, int numMaximumItems = 30) where T : IConfidence
|
||||
{
|
||||
switch (strategy)
|
||||
{
|
||||
case TargetWindowStrategy.A_FEW_GOOD_ONES:
|
||||
return new(1, 2, 3, 0f);
|
||||
|
||||
case TargetWindowStrategy.TOP10_BETTER_THAN_GUESSING:
|
||||
var numItemsBetterThanGuessing = items.Count(x => x.Confidence > 0.5f);
|
||||
if(numItemsBetterThanGuessing < 3)
|
||||
return new(1, 2, 3, 0.5f);
|
||||
|
||||
// We want the top 10% of items better than guessing:
|
||||
var numTop10Percent = (int) MathF.Floor(numItemsBetterThanGuessing * 0.1f);
|
||||
|
||||
// When these 10% are just a few items, we take them all:
|
||||
if (numTop10Percent <= 10)
|
||||
{
|
||||
var diff = numItemsBetterThanGuessing - numTop10Percent;
|
||||
var num50Percent = (int) MathF.Floor(numItemsBetterThanGuessing * 0.5f);
|
||||
return new(num50Percent, num50Percent + 1, Math.Max(numItemsBetterThanGuessing, diff), 0.5f);
|
||||
}
|
||||
|
||||
// Let's define the size of the window:
|
||||
const int MIN_NUM_ITEMS = 3;
|
||||
var windowMin = Math.Max(MIN_NUM_ITEMS + 1, numTop10Percent);
|
||||
windowMin = Math.Min(windowMin, numMaximumItems - 1);
|
||||
var totalMin = Math.Max(MIN_NUM_ITEMS, windowMin - 3);
|
||||
var windowSize = (int)MathF.Max(MathF.Floor(numTop10Percent * 0.1f), MathF.Min(10, numTop10Percent));
|
||||
var windowMax = Math.Min(numMaximumItems, numTop10Percent + windowSize);
|
||||
return new(totalMin, windowMin, windowMax, 0.5f);
|
||||
|
||||
case TargetWindowStrategy.NONE:
|
||||
default:
|
||||
return new(-1, -1, -1, 0f);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determine the optimal confidence threshold for a list of items
|
||||
/// in order to match a target window of number of items.
|
||||
/// </summary>
|
||||
/// <param name="items">The list of confidence items to analyze.</param>
|
||||
/// <param name="targetWindowMin">The minimum number of items in the target window. Should be at least 2 and more than numMinItems.</param>
|
||||
/// <param name="targetWindowMax">The maximum number of items in the target window.</param>
|
||||
/// <param name="numMinItems">The minimum number of items to match the threshold. Should be at least 1 and less than targetWindowMin.</param>
|
||||
/// <param name="targetWindow">The target window for the number of items.</param>
|
||||
/// <param name="maxSteps">The maximum number of steps to search for the threshold.</param>
|
||||
/// <typeparam name="T">The type of items in the list.</typeparam>
|
||||
/// <returns>The confidence threshold.</returns>
|
||||
public static float GetConfidenceThreshold<T>(this IList<T> items, int targetWindowMin = 2, int targetWindowMax = 3, int numMinItems = 1, int maxSteps = 10) where T : IConfidence
|
||||
public static float GetConfidenceThreshold<T>(this IReadOnlyList<T> items, TargetWindow targetWindow, int maxSteps = 10) where T : IConfidence
|
||||
{
|
||||
if(!targetWindow.IsValid())
|
||||
{
|
||||
var logger = Program.SERVICE_PROVIDER.GetService<ILogger<IConfidence>>()!;
|
||||
logger.LogWarning("The target window is invalid. Returning 0f as threshold.");
|
||||
return 0f;
|
||||
}
|
||||
|
||||
var confidenceValues = items.Select(x => x.Confidence).ToList();
|
||||
var lowerBound = confidenceValues.Min();
|
||||
var minConfidence = confidenceValues.Min();
|
||||
var lowerBound = MathF.Max(minConfidence, targetWindow.MinThreshold);
|
||||
var upperBound = confidenceValues.Max();
|
||||
|
||||
//
|
||||
// We search for a threshold so that we have between
|
||||
// targetWindowMin and targetWindowMax items. When not
|
||||
// possible, we take all items (i.e., threshold = 0f)
|
||||
// possible, we take all items (e.g., threshold = 0f; depends on the used window strategy)
|
||||
//
|
||||
var threshold = 0.0f;
|
||||
|
||||
// Check the case where the confidence values are too close:
|
||||
if (upperBound - lowerBound >= 0.01)
|
||||
if (upperBound - minConfidence >= 0.01)
|
||||
{
|
||||
var previousThreshold = 0.0f;
|
||||
var previousThreshold = threshold;
|
||||
for (var i = 0; i < maxSteps; i++)
|
||||
{
|
||||
threshold = lowerBound + (upperBound - lowerBound) * i / maxSteps;
|
||||
var numMatches = items.Count(x => x.Confidence >= threshold);
|
||||
if (numMatches <= numMinItems)
|
||||
if (numMatches <= targetWindow.NumMinItems)
|
||||
{
|
||||
threshold = previousThreshold;
|
||||
break;
|
||||
}
|
||||
|
||||
if (numMatches <= targetWindowMax && numMatches >= targetWindowMin)
|
||||
if (targetWindow.InsideWindow(numMatches))
|
||||
break;
|
||||
|
||||
previousThreshold = threshold;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
var logger = Program.SERVICE_PROVIDER.GetService<ILogger<IConfidence>>()!;
|
||||
logger.LogWarning("The confidence values are too close. Returning 0f as threshold.");
|
||||
}
|
||||
|
||||
return threshold;
|
||||
}
|
||||
|
@ -71,7 +71,8 @@ public class AgenticSrcSelWithDynHeur : IDataSourceSelectionProcess
|
||||
if (aiSelectedDataSources.Count > 3)
|
||||
{
|
||||
// We have more than 3 data sources. Let's filter by confidence:
|
||||
var threshold = aiSelectedDataSources.GetConfidenceThreshold();
|
||||
var targetWindow = aiSelectedDataSources.DetermineTargetWindow(TargetWindowStrategy.A_FEW_GOOD_ONES);
|
||||
var threshold = aiSelectedDataSources.GetConfidenceThreshold(targetWindow);
|
||||
|
||||
//
|
||||
// Filter the data sources by the threshold:
|
||||
|
38
app/MindWork AI Studio/Tools/TargetWindow.cs
Normal file
38
app/MindWork AI Studio/Tools/TargetWindow.cs
Normal file
@ -0,0 +1,38 @@
|
||||
namespace AIStudio.Tools;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a target window for the number of items to match a threshold.
|
||||
/// </summary>
|
||||
/// <param name="NumMinItems">The minimum number of items to match the threshold. Should be at least one and less than targetWindowMin.</param>
|
||||
/// <param name="TargetWindowMin">The minimum number of items in the target window. Should be at least 2 and more than numMinItems.</param>
|
||||
/// <param name="TargetWindowMax">The maximum number of items in the target window.</param>
|
||||
public readonly record struct TargetWindow(int NumMinItems, int TargetWindowMin, int TargetWindowMax, float MinThreshold)
|
||||
{
|
||||
/// <summary>
|
||||
/// Determines if the target window is valid.
|
||||
/// </summary>
|
||||
/// <returns>True when the target window is valid; otherwise, false.</returns>
|
||||
public bool IsValid()
|
||||
{
|
||||
if(this.NumMinItems < 1)
|
||||
return false;
|
||||
|
||||
if(this.TargetWindowMin < this.NumMinItems)
|
||||
return false;
|
||||
|
||||
if(this.TargetWindowMax < this.TargetWindowMin)
|
||||
return false;
|
||||
|
||||
if(this.MinThreshold is < 0f or > 1f)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines if the number of items is inside the target window.
|
||||
/// </summary>
|
||||
/// <param name="numItems">The number of items to check.</param>
|
||||
/// <returns>True when the number of items is inside the target window; otherwise, false.</returns>
|
||||
public bool InsideWindow(int numItems) => numItems >= this.TargetWindowMin && numItems <= this.TargetWindowMax;
|
||||
}
|
19
app/MindWork AI Studio/Tools/TargetWindowStrategy.cs
Normal file
19
app/MindWork AI Studio/Tools/TargetWindowStrategy.cs
Normal file
@ -0,0 +1,19 @@
|
||||
namespace AIStudio.Tools;
|
||||
|
||||
public enum TargetWindowStrategy
|
||||
{
|
||||
/// <summary>
|
||||
/// Means no target window strategy, which will effectively return all items.
|
||||
/// </summary>
|
||||
NONE,
|
||||
|
||||
/// <summary>
|
||||
/// Searches for two up-to-three items but at least one.
|
||||
/// </summary>
|
||||
A_FEW_GOOD_ONES,
|
||||
|
||||
/// <summary>
|
||||
/// Searches for the top 10% items that are better than guessing, i.e., with confidence greater than 0.5f.
|
||||
/// </summary>
|
||||
TOP10_BETTER_THAN_GUESSING,
|
||||
}
|
Loading…
Reference in New Issue
Block a user