Enhanced llama.cpp support for loading available models

This commit is contained in:
Thorsten Sommer 2026-06-11 15:22:32 +02:00
parent 71ae52753a
commit d87342869d
Signed by untrusted user who does not match committer: tsommer
GPG Key ID: 371BBA77A02C0108
8 changed files with 142 additions and 21 deletions

View File

@ -71,7 +71,7 @@
@* ReSharper restore Asp.Entity *@
}
@if (!this.DataLLMProvider.IsLLMModelSelectionHidden(this.DataHost))
@if (!this.IsLLMModelSelectionHidden)
{
<MudField FullWidth="true" Label="@T("Model selection")" Variant="Variant.Outlined" Class="mb-3">
<MudStack Row="@true" AlignItems="AlignItems.Center" StretchItems="StretchItems.End">

View File

@ -104,6 +104,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
private string dataAPIKeyStorageIssue = string.Empty;
private string dataEditingPreviousInstanceName = string.Empty;
private string dataLoadingModelsIssue = string.Empty;
private bool usesLegacySystemModelFallback;
private bool showExpertSettings;
// We get the form reference from Blazor code to validate it manually:
@ -123,6 +124,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
GetUsedInstanceNames = () => this.UsedInstanceNames,
GetHost = () => this.DataHost,
IsModelProvidedManually = () => this.DataLLMProvider.IsLLMModelProvidedManually(),
IsModelSelectionHidden = () => this.IsLLMModelSelectionHidden,
};
}
@ -132,9 +134,9 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
// Determine the model based on the provider and host configuration:
Model model;
if (this.DataLLMProvider.IsLLMModelSelectionHidden(this.DataHost))
if (this.IsLLMModelSelectionHidden)
{
// Use system model placeholder for hosts that don't support model selection (e.g., llama.cpp):
// Use system model placeholder for legacy hosts that don't support model selection:
model = Model.SYSTEM_MODEL;
}
else if (this.DataLLMProvider is LLMProviders.FIREWORKS or LLMProviders.HUGGINGFACE)
@ -300,6 +302,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
this.dataManuallyModel = string.Empty;
this.availableModels.Clear();
this.dataLoadingModelsIssue = string.Empty;
this.usesLegacySystemModelFallback = false;
}
private async Task ReloadModels()
@ -321,6 +324,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
this.availableModels.Clear();
this.availableModels.AddRange(orderedModels);
this.UpdateModelSelectionAfterLoading();
}
catch (Exception e)
{
@ -334,6 +338,34 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
LLMProviders.SELF_HOSTED => T("(Optional) API Key"),
_ => T("API Key"),
};
private bool IsLLMModelSelectionHidden => this.DataLLMProvider.IsLLMModelSelectionHidden(this.DataHost) ||
this.DataLLMProvider is LLMProviders.SELF_HOSTED &&
this.DataHost is Host.LLAMA_CPP &&
this.usesLegacySystemModelFallback;
private void UpdateModelSelectionAfterLoading()
{
if (this.DataLLMProvider is not LLMProviders.SELF_HOSTED || this.DataHost is not Host.LLAMA_CPP)
return;
this.usesLegacySystemModelFallback = this.availableModels.Count is 1 && this.availableModels[0].IsSystemModel;
if (this.usesLegacySystemModelFallback)
{
this.DataModel = Model.SYSTEM_MODEL;
return;
}
var availableModel = this.availableModels.FirstOrDefault(model =>
string.Equals(model.Id, this.DataModel.Id, StringComparison.OrdinalIgnoreCase));
if (availableModel != default)
{
this.DataModel = availableModel;
return;
}
this.DataModel = this.availableModels.Count is 1 ? this.availableModels[0] : default;
}
private void ToggleExpertSettings() => this.showExpertSettings = !this.showExpertSettings;

View File

@ -329,14 +329,13 @@ public static class LLMProvidersExtensions
/// <summary>
/// Determines if the model selection should be completely hidden for LLM providers.
/// This is the case when the host does not support model selection (e.g., llama.cpp).
/// This is the case when the host does not support model selection.
/// </summary>
/// <param name="provider">The provider.</param>
/// <param name="host">The host for self-hosted providers.</param>
/// <returns>True if model selection should be hidden; otherwise, false.</returns>
public static bool IsLLMModelSelectionHidden(this LLMProviders provider, Host host) => provider switch
{
LLMProviders.SELF_HOSTED => host is Host.LLAMA_CPP,
_ => false,
};
@ -416,11 +415,11 @@ public static class LLMProvidersExtensions
switch (host)
{
case Host.NONE:
case Host.LLAMA_CPP:
case Host.WHISPER_CPP:
default:
return false;
case Host.LLAMA_CPP:
case Host.OLLAMA:
case Host.LM_STUDIO:
case Host.VLLM:

View File

@ -23,7 +23,7 @@ public readonly record struct Model(string Id, string? DisplayName)
/// <summary>
/// Checks if this model is the system-configured placeholder.
/// </summary>
public bool IsSystemModel => this == SYSTEM_MODEL;
public bool IsSystemModel => string.Equals(this.Id, SYSTEM_MODEL_ID, StringComparison.Ordinal);
private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(Model).Namespace, nameof(Model));

View File

@ -1,5 +1,5 @@
namespace AIStudio.Provider.SelfHosted;
public readonly record struct ModelsResponse(string Object, Model[] Data);
public readonly record struct ModelsResponse(string? Object, Model[]? Data);
public readonly record struct Model(string Id, string Object, string OwnedBy);
public readonly record struct Model(string Id, string? Object, string? OwnedBy);

View File

@ -1,5 +1,6 @@
using System.Net.Http.Headers;
using System.Runtime.CompilerServices;
using System.Text.Json;
using AIStudio.Chat;
using AIStudio.Provider.OpenAI;
@ -23,14 +24,15 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide
public override string InstanceName { get; set; } = "Self-hosted";
/// <inheritdoc />
public override bool HasModelLoadingCapability => host is Host.OLLAMA or Host.LM_STUDIO or Host.VLLM;
public override bool HasModelLoadingCapability => host is Host.OLLAMA or Host.LM_STUDIO or Host.VLLM or Host.LLAMA_CPP;
/// <inheritdoc />
public override async IAsyncEnumerable<ContentStreamChunk> StreamChatCompletion(Provider.Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default)
{
var effectiveChatModel = await this.ResolveChatModelForRequest(chatModel, token);
await foreach (var content in this.StreamOpenAICompatibleChatCompletion<ChatCompletionAPIRequest, ChatCompletionDeltaStreamLine, ChatCompletionAnnotationStreamLine>(
"self-hosted provider",
chatModel,
effectiveChatModel,
chatThread,
settingsManager,
async (systemPrompt, apiParameters) =>
@ -40,13 +42,13 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide
// - LM Studio, vLLM, and llama.cpp use the nested image URL format: { "type": "image_url", "image_url": { "url": "data:..." } }
var messages = host switch
{
Host.OLLAMA => await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel),
_ => await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel),
Host.OLLAMA => await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, effectiveChatModel),
_ => await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, effectiveChatModel),
};
return new ChatCompletionAPIRequest
{
Model = chatModel.Id,
Model = effectiveChatModel.Id,
// Build the messages:
// - First of all the system prompt
@ -93,9 +95,7 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide
switch (host)
{
case Host.LLAMA_CPP:
// Right now, llama.cpp only supports one model.
// There is no API to list the model(s).
return ModelLoadResult.FromModels([ new Provider.Model("as configured by llama.cpp", null) ]);
return await this.LoadLlamaCppTextModels(token, apiKeyProvisional);
case Host.LM_STUDIO:
case Host.OLLAMA:
@ -188,8 +188,10 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide
}
var lmStudioModelResponse = await lmStudioResponse.Content.ReadFromJsonAsync<ModelsResponse>(token);
return SuccessfulModelLoadResult(lmStudioModelResponse.Data.
Where(model => !ignorePhrases.Any(ignorePhrase => model.Id.Contains(ignorePhrase, StringComparison.InvariantCulture)) &&
var models = lmStudioModelResponse.Data ?? [];
return SuccessfulModelLoadResult(models.
Where(model => !string.IsNullOrWhiteSpace(model.Id) &&
!ignorePhrases.Any(ignorePhrase => model.Id.Contains(ignorePhrase, StringComparison.InvariantCulture)) &&
filterPhrases.All( filter => model.Id.Contains(filter, StringComparison.InvariantCulture)))
.Select(n => new Provider.Model(n.Id, null)));
}
@ -200,4 +202,85 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide
return FailedModelLoadResult(ModelLoadFailureReason.PROVIDER_UNAVAILABLE, e.Message);
}
}
private async Task<Provider.Model> ResolveChatModelForRequest(Provider.Model chatModel, CancellationToken token)
{
if (host is not Host.LLAMA_CPP || !chatModel.IsSystemModel)
return chatModel;
var modelLoadResult = await this.LoadLlamaCppTextModels(token);
if (!modelLoadResult.Success)
return chatModel;
var availableModels = modelLoadResult.Models
.Where(model => !model.IsSystemModel && !string.IsNullOrWhiteSpace(model.Id))
.ToList();
if (availableModels.Count is 1)
return availableModels[0];
if (availableModels.Count > 1)
throw new ProviderRequestException(
ProviderRequestFailureReason.NONE,
string.Format(
TB("The llama.cpp provider '{0}' offers multiple models. Please open the provider settings and select the model to use."),
this.InstanceName));
return chatModel;
}
private async Task<ModelLoadResult> LoadLlamaCppTextModels(CancellationToken token, string? apiKeyProvisional = null)
{
var secretKey = await this.GetModelLoadingSecretKey(SecretStoreType.LLM_PROVIDER, apiKeyProvisional, true);
try
{
using var request = new HttpRequestMessage(HttpMethod.Get, "models");
if (!string.IsNullOrWhiteSpace(secretKey))
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", secretKey);
using var response = await this.HttpClient.SendAsync(request, token);
var responseBody = await response.Content.ReadAsStringAsync(token);
if (!response.IsSuccessStatusCode)
{
if (response.StatusCode is System.Net.HttpStatusCode.NotFound)
return LlamaCppLegacyModelResult();
LOGGER.LogError("llama.cpp model loading request failed with status code {ResponseStatusCode} (message = '{ResponseReasonPhrase}', error body = '{ErrorBody}').", response.StatusCode, response.ReasonPhrase, responseBody);
return FailedModelLoadResult(this.GetModelLoadFailureReason(response, responseBody), $"Status={(int)response.StatusCode} {response.ReasonPhrase}; Body='{responseBody}'");
}
try
{
var modelResponse = JsonSerializer.Deserialize<ModelsResponse>(responseBody, JSON_SERIALIZER_OPTIONS);
var models = modelResponse.Data?
.Where(model => !string.IsNullOrWhiteSpace(model.Id))
.Select(model => new Provider.Model(model.Id, null))
.ToList() ?? [];
return models.Count is 0 ? LlamaCppLegacyModelResult() : SuccessfulModelLoadResult(models);
}
catch (JsonException e)
{
LOGGER.LogWarning(e, "The llama.cpp model loading response could not be parsed. Falling back to the legacy system-configured model.");
return LlamaCppLegacyModelResult();
}
}
catch (Exception e) when (this.IsTimeoutException(e, token))
{
await this.SendTimeoutError("loading the available models");
LOGGER.LogError(e, "Timed out while loading models from llama.cpp provider '{ProviderInstanceName}'.", this.InstanceName);
return FailedModelLoadResult(ModelLoadFailureReason.PROVIDER_UNAVAILABLE, e.Message);
}
catch (Exception e)
{
LOGGER.LogError(e, "Failed to load models from llama.cpp provider '{ProviderInstanceName}'.", this.InstanceName);
return FailedModelLoadResult(ModelLoadFailureReason.UNKNOWN, e.Message);
}
}
private static ModelLoadResult LlamaCppLegacyModelResult()
{
return ModelLoadResult.FromModels([ AIStudio.Provider.Model.SYSTEM_MODEL ]);
}
}

View File

@ -22,6 +22,8 @@ public sealed class ProviderValidation
public Func<bool> IsModelProvidedManually { get; init; } = () => false;
public Func<bool> IsModelSelectionHidden { get; init; } = () => false;
public string? ValidatingHostname(string hostname)
{
if(this.GetProvider() != LLMProviders.SELF_HOSTED)
@ -76,9 +78,13 @@ public sealed class ProviderValidation
if (this.GetProvider() is LLMProviders.NONE)
return null;
// For self-hosted llama.cpp or whisper.cpp, no model selection needed
// For self-hosted whisper.cpp, no model selection needed
// (model is loaded at startup):
if (this.GetProvider() is LLMProviders.SELF_HOSTED && this.GetHost() is Host.LLAMA_CPP or Host.WHISPER_CPP)
if (this.GetProvider() is LLMProviders.SELF_HOSTED && this.GetHost() is Host.WHISPER_CPP)
return null;
// For legacy hosts without model selection, no selection validation is needed:
if (this.IsModelSelectionHidden())
return null;
// For manually entered models, this validation doesn't apply:

View File

@ -5,6 +5,7 @@
- Added support for reading enterprise policy files from a Flatpak provisioning extension.
- Added startup path and Linux package type details to the information page to make support easier.
- Added the option to search for chats in all workspaces.
- Improved self-hosted llama.cpp providers by loading available models from the server and supporting servers that offer multiple models. Thanks to the GONICUS team for reporting this issue.
- Improved workspaces by highlighting the currently open chat in the workspace view.
- Improved workspaces by adding a shortcut to start a new chat directly from each workspace row.
- Improved workspaces by allowing new workspaces to be created while moving a chat.