From d87342869d30e8b3f4b8bc248f2c20604b8507b4 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Thu, 11 Jun 2026 15:22:32 +0200 Subject: [PATCH] Enhanced llama.cpp support for loading available models --- .../Dialogs/ProviderDialog.razor | 2 +- .../Dialogs/ProviderDialog.razor.cs | 36 +++++- .../Provider/LLMProvidersExtensions.cs | 5 +- app/MindWork AI Studio/Provider/Model.cs | 2 +- .../Provider/SelfHosted/ModelsResponse.cs | 4 +- .../Provider/SelfHosted/ProviderSelfHosted.cs | 103 ++++++++++++++++-- .../Tools/Validation/ProviderValidation.cs | 10 +- .../wwwroot/changelog/v26.6.1.md | 1 + 8 files changed, 142 insertions(+), 21 deletions(-) diff --git a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor index 4c09da2f..64299132 100644 --- a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor +++ b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor @@ -71,7 +71,7 @@ @* ReSharper restore Asp.Entity *@ } - @if (!this.DataLLMProvider.IsLLMModelSelectionHidden(this.DataHost)) + @if (!this.IsLLMModelSelectionHidden) { diff --git a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs index 0e395324..36600e65 100644 --- a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs +++ b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs @@ -104,6 +104,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId private string dataAPIKeyStorageIssue = string.Empty; private string dataEditingPreviousInstanceName = string.Empty; private string dataLoadingModelsIssue = string.Empty; + private bool usesLegacySystemModelFallback; private bool showExpertSettings; // We get the form reference from Blazor code to validate it manually: @@ -123,6 +124,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId GetUsedInstanceNames = () => this.UsedInstanceNames, GetHost = () => this.DataHost, IsModelProvidedManually = () => this.DataLLMProvider.IsLLMModelProvidedManually(), + IsModelSelectionHidden = () => this.IsLLMModelSelectionHidden, }; } @@ -132,9 +134,9 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId // Determine the model based on the provider and host configuration: Model model; - if (this.DataLLMProvider.IsLLMModelSelectionHidden(this.DataHost)) + if (this.IsLLMModelSelectionHidden) { - // Use system model placeholder for hosts that don't support model selection (e.g., llama.cpp): + // Use system model placeholder for legacy hosts that don't support model selection: model = Model.SYSTEM_MODEL; } else if (this.DataLLMProvider is LLMProviders.FIREWORKS or LLMProviders.HUGGINGFACE) @@ -300,6 +302,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId this.dataManuallyModel = string.Empty; this.availableModels.Clear(); this.dataLoadingModelsIssue = string.Empty; + this.usesLegacySystemModelFallback = false; } private async Task ReloadModels() @@ -321,6 +324,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId this.availableModels.Clear(); this.availableModels.AddRange(orderedModels); + this.UpdateModelSelectionAfterLoading(); } catch (Exception e) { @@ -334,6 +338,34 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId LLMProviders.SELF_HOSTED => T("(Optional) API Key"), _ => T("API Key"), }; + + private bool IsLLMModelSelectionHidden => this.DataLLMProvider.IsLLMModelSelectionHidden(this.DataHost) || + this.DataLLMProvider is LLMProviders.SELF_HOSTED && + this.DataHost is Host.LLAMA_CPP && + this.usesLegacySystemModelFallback; + + private void UpdateModelSelectionAfterLoading() + { + if (this.DataLLMProvider is not LLMProviders.SELF_HOSTED || this.DataHost is not Host.LLAMA_CPP) + return; + + this.usesLegacySystemModelFallback = this.availableModels.Count is 1 && this.availableModels[0].IsSystemModel; + if (this.usesLegacySystemModelFallback) + { + this.DataModel = Model.SYSTEM_MODEL; + return; + } + + var availableModel = this.availableModels.FirstOrDefault(model => + string.Equals(model.Id, this.DataModel.Id, StringComparison.OrdinalIgnoreCase)); + if (availableModel != default) + { + this.DataModel = availableModel; + return; + } + + this.DataModel = this.availableModels.Count is 1 ? this.availableModels[0] : default; + } private void ToggleExpertSettings() => this.showExpertSettings = !this.showExpertSettings; diff --git a/app/MindWork AI Studio/Provider/LLMProvidersExtensions.cs b/app/MindWork AI Studio/Provider/LLMProvidersExtensions.cs index e71cef95..a6867e0f 100644 --- a/app/MindWork AI Studio/Provider/LLMProvidersExtensions.cs +++ b/app/MindWork AI Studio/Provider/LLMProvidersExtensions.cs @@ -329,14 +329,13 @@ public static class LLMProvidersExtensions /// /// Determines if the model selection should be completely hidden for LLM providers. - /// This is the case when the host does not support model selection (e.g., llama.cpp). + /// This is the case when the host does not support model selection. /// /// The provider. /// The host for self-hosted providers. /// True if model selection should be hidden; otherwise, false. public static bool IsLLMModelSelectionHidden(this LLMProviders provider, Host host) => provider switch { - LLMProviders.SELF_HOSTED => host is Host.LLAMA_CPP, _ => false, }; @@ -416,11 +415,11 @@ public static class LLMProvidersExtensions switch (host) { case Host.NONE: - case Host.LLAMA_CPP: case Host.WHISPER_CPP: default: return false; + case Host.LLAMA_CPP: case Host.OLLAMA: case Host.LM_STUDIO: case Host.VLLM: diff --git a/app/MindWork AI Studio/Provider/Model.cs b/app/MindWork AI Studio/Provider/Model.cs index 0cd43395..f0b64539 100644 --- a/app/MindWork AI Studio/Provider/Model.cs +++ b/app/MindWork AI Studio/Provider/Model.cs @@ -23,7 +23,7 @@ public readonly record struct Model(string Id, string? DisplayName) /// /// Checks if this model is the system-configured placeholder. /// - public bool IsSystemModel => this == SYSTEM_MODEL; + public bool IsSystemModel => string.Equals(this.Id, SYSTEM_MODEL_ID, StringComparison.Ordinal); private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(Model).Namespace, nameof(Model)); diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ModelsResponse.cs b/app/MindWork AI Studio/Provider/SelfHosted/ModelsResponse.cs index 8ea8fb57..fcb23c61 100644 --- a/app/MindWork AI Studio/Provider/SelfHosted/ModelsResponse.cs +++ b/app/MindWork AI Studio/Provider/SelfHosted/ModelsResponse.cs @@ -1,5 +1,5 @@ namespace AIStudio.Provider.SelfHosted; -public readonly record struct ModelsResponse(string Object, Model[] Data); +public readonly record struct ModelsResponse(string? Object, Model[]? Data); -public readonly record struct Model(string Id, string Object, string OwnedBy); \ No newline at end of file +public readonly record struct Model(string Id, string? Object, string? OwnedBy); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs index 53ee6db8..af0c56a4 100644 --- a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs +++ b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs @@ -1,5 +1,6 @@ using System.Net.Http.Headers; using System.Runtime.CompilerServices; +using System.Text.Json; using AIStudio.Chat; using AIStudio.Provider.OpenAI; @@ -23,14 +24,15 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide public override string InstanceName { get; set; } = "Self-hosted"; /// - public override bool HasModelLoadingCapability => host is Host.OLLAMA or Host.LM_STUDIO or Host.VLLM; + public override bool HasModelLoadingCapability => host is Host.OLLAMA or Host.LM_STUDIO or Host.VLLM or Host.LLAMA_CPP; /// public override async IAsyncEnumerable StreamChatCompletion(Provider.Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { + var effectiveChatModel = await this.ResolveChatModelForRequest(chatModel, token); await foreach (var content in this.StreamOpenAICompatibleChatCompletion( "self-hosted provider", - chatModel, + effectiveChatModel, chatThread, settingsManager, async (systemPrompt, apiParameters) => @@ -40,13 +42,13 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide // - LM Studio, vLLM, and llama.cpp use the nested image URL format: { "type": "image_url", "image_url": { "url": "data:..." } } var messages = host switch { - Host.OLLAMA => await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel), - _ => await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel), + Host.OLLAMA => await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, effectiveChatModel), + _ => await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, effectiveChatModel), }; return new ChatCompletionAPIRequest { - Model = chatModel.Id, + Model = effectiveChatModel.Id, // Build the messages: // - First of all the system prompt @@ -93,9 +95,7 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide switch (host) { case Host.LLAMA_CPP: - // Right now, llama.cpp only supports one model. - // There is no API to list the model(s). - return ModelLoadResult.FromModels([ new Provider.Model("as configured by llama.cpp", null) ]); + return await this.LoadLlamaCppTextModels(token, apiKeyProvisional); case Host.LM_STUDIO: case Host.OLLAMA: @@ -188,8 +188,10 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide } var lmStudioModelResponse = await lmStudioResponse.Content.ReadFromJsonAsync(token); - return SuccessfulModelLoadResult(lmStudioModelResponse.Data. - Where(model => !ignorePhrases.Any(ignorePhrase => model.Id.Contains(ignorePhrase, StringComparison.InvariantCulture)) && + var models = lmStudioModelResponse.Data ?? []; + return SuccessfulModelLoadResult(models. + Where(model => !string.IsNullOrWhiteSpace(model.Id) && + !ignorePhrases.Any(ignorePhrase => model.Id.Contains(ignorePhrase, StringComparison.InvariantCulture)) && filterPhrases.All( filter => model.Id.Contains(filter, StringComparison.InvariantCulture))) .Select(n => new Provider.Model(n.Id, null))); } @@ -200,4 +202,85 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide return FailedModelLoadResult(ModelLoadFailureReason.PROVIDER_UNAVAILABLE, e.Message); } } + + private async Task ResolveChatModelForRequest(Provider.Model chatModel, CancellationToken token) + { + if (host is not Host.LLAMA_CPP || !chatModel.IsSystemModel) + return chatModel; + + var modelLoadResult = await this.LoadLlamaCppTextModels(token); + if (!modelLoadResult.Success) + return chatModel; + + var availableModels = modelLoadResult.Models + .Where(model => !model.IsSystemModel && !string.IsNullOrWhiteSpace(model.Id)) + .ToList(); + + if (availableModels.Count is 1) + return availableModels[0]; + + if (availableModels.Count > 1) + throw new ProviderRequestException( + ProviderRequestFailureReason.NONE, + string.Format( + TB("The llama.cpp provider '{0}' offers multiple models. Please open the provider settings and select the model to use."), + this.InstanceName)); + + return chatModel; + } + + private async Task LoadLlamaCppTextModels(CancellationToken token, string? apiKeyProvisional = null) + { + var secretKey = await this.GetModelLoadingSecretKey(SecretStoreType.LLM_PROVIDER, apiKeyProvisional, true); + + try + { + using var request = new HttpRequestMessage(HttpMethod.Get, "models"); + if (!string.IsNullOrWhiteSpace(secretKey)) + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", secretKey); + + using var response = await this.HttpClient.SendAsync(request, token); + var responseBody = await response.Content.ReadAsStringAsync(token); + if (!response.IsSuccessStatusCode) + { + if (response.StatusCode is System.Net.HttpStatusCode.NotFound) + return LlamaCppLegacyModelResult(); + + LOGGER.LogError("llama.cpp model loading request failed with status code {ResponseStatusCode} (message = '{ResponseReasonPhrase}', error body = '{ErrorBody}').", response.StatusCode, response.ReasonPhrase, responseBody); + return FailedModelLoadResult(this.GetModelLoadFailureReason(response, responseBody), $"Status={(int)response.StatusCode} {response.ReasonPhrase}; Body='{responseBody}'"); + } + + try + { + var modelResponse = JsonSerializer.Deserialize(responseBody, JSON_SERIALIZER_OPTIONS); + var models = modelResponse.Data? + .Where(model => !string.IsNullOrWhiteSpace(model.Id)) + .Select(model => new Provider.Model(model.Id, null)) + .ToList() ?? []; + + return models.Count is 0 ? LlamaCppLegacyModelResult() : SuccessfulModelLoadResult(models); + } + catch (JsonException e) + { + LOGGER.LogWarning(e, "The llama.cpp model loading response could not be parsed. Falling back to the legacy system-configured model."); + return LlamaCppLegacyModelResult(); + } + } + catch (Exception e) when (this.IsTimeoutException(e, token)) + { + await this.SendTimeoutError("loading the available models"); + LOGGER.LogError(e, "Timed out while loading models from llama.cpp provider '{ProviderInstanceName}'.", this.InstanceName); + return FailedModelLoadResult(ModelLoadFailureReason.PROVIDER_UNAVAILABLE, e.Message); + } + catch (Exception e) + { + LOGGER.LogError(e, "Failed to load models from llama.cpp provider '{ProviderInstanceName}'.", this.InstanceName); + return FailedModelLoadResult(ModelLoadFailureReason.UNKNOWN, e.Message); + } + } + + private static ModelLoadResult LlamaCppLegacyModelResult() + { + return ModelLoadResult.FromModels([ AIStudio.Provider.Model.SYSTEM_MODEL ]); + } } \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/Validation/ProviderValidation.cs b/app/MindWork AI Studio/Tools/Validation/ProviderValidation.cs index bb72feb4..5e98efd8 100644 --- a/app/MindWork AI Studio/Tools/Validation/ProviderValidation.cs +++ b/app/MindWork AI Studio/Tools/Validation/ProviderValidation.cs @@ -22,6 +22,8 @@ public sealed class ProviderValidation public Func IsModelProvidedManually { get; init; } = () => false; + public Func IsModelSelectionHidden { get; init; } = () => false; + public string? ValidatingHostname(string hostname) { if(this.GetProvider() != LLMProviders.SELF_HOSTED) @@ -76,9 +78,13 @@ public sealed class ProviderValidation if (this.GetProvider() is LLMProviders.NONE) return null; - // For self-hosted llama.cpp or whisper.cpp, no model selection needed + // For self-hosted whisper.cpp, no model selection needed // (model is loaded at startup): - if (this.GetProvider() is LLMProviders.SELF_HOSTED && this.GetHost() is Host.LLAMA_CPP or Host.WHISPER_CPP) + if (this.GetProvider() is LLMProviders.SELF_HOSTED && this.GetHost() is Host.WHISPER_CPP) + return null; + + // For legacy hosts without model selection, no selection validation is needed: + if (this.IsModelSelectionHidden()) return null; // For manually entered models, this validation doesn't apply: diff --git a/app/MindWork AI Studio/wwwroot/changelog/v26.6.1.md b/app/MindWork AI Studio/wwwroot/changelog/v26.6.1.md index a367b1a0..3f719b68 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v26.6.1.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v26.6.1.md @@ -5,6 +5,7 @@ - Added support for reading enterprise policy files from a Flatpak provisioning extension. - Added startup path and Linux package type details to the information page to make support easier. - Added the option to search for chats in all workspaces. +- Improved self-hosted llama.cpp providers by loading available models from the server and supporting servers that offer multiple models. Thanks to the GONICUS team for reporting this issue. - Improved workspaces by highlighting the currently open chat in the workspace view. - Improved workspaces by adding a shortcut to start a new chat directly from each workspace row. - Improved workspaces by allowing new workspaces to be created while moving a chat.