Removed the MaxTokens parameter, because vLLM doesnt support setting the value to -1.

2026-02-15 02:21:37 +00:00 · 2025-07-24 17:23:30 +02:00 · 2025-07-24 17:23:30 +02:00 · ca267cf174
commit ca267cf174
parent 6116c03f7c
6 changed files with 10 additions and 8 deletions
--- a/Studio/Dialogs/ProviderDialog.razor.cs
+++ b/Studio/Dialogs/ProviderDialog.razor.cs
@ -164,7 +164,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
            //
            // We cannot load the API key for self-hosted providers:
            //
-            if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA)
+            if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA && this.DataHost is not Host.V_LLM)
            {
                await this.ReloadModels();
                await base.OnInitializedAsync();
--- a/Studio/Provider/LLMProvidersExtensions.cs
+++ b/Studio/Provider/LLMProvidersExtensions.cs
@ -285,7 +285,7 @@ public static class LLMProvidersExtensions
        LLMProviders.GWDG => true,
        LLMProviders.HUGGINGFACE => true,
        
-        LLMProviders.SELF_HOSTED => host is Host.OLLAMA,
+        LLMProviders.SELF_HOSTED => host is (Host.OLLAMA or Host.V_LLM),
        
        _ => false,
    };
@ -322,6 +322,7 @@ public static class LLMProvidersExtensions

                case Host.OLLAMA:
                case Host.LM_STUDIO:
+                case Host.V_LLM:
                    return true;
            }
        }
--- a/Studio/Provider/SelfHosted/ChatRequest.cs
+++ b/Studio/Provider/SelfHosted/ChatRequest.cs
@ -6,11 +6,8 @@ namespace AIStudio.Provider.SelfHosted;
 /// <param name="Model">Which model to use for chat completion.</param>
 /// <param name="Messages">The chat messages.</param>
 /// <param name="Stream">Whether to stream the chat completion.</param>
-/// <param name="MaxTokens">The maximum number of tokens to generate.</param>
 public readonly record struct ChatRequest(
    string Model,
    IList<Message> Messages,
-    bool Stream,
-    
-    int MaxTokens
+    bool Stream
 );
--- a/Studio/Provider/SelfHosted/Host.cs
+++ b/Studio/Provider/SelfHosted/Host.cs
@ -7,4 +7,5 @@ public enum Host
    LM_STUDIO,
    LLAMACPP,
    OLLAMA,
+    V_LLM,
 }
--- a/Studio/Provider/SelfHosted/HostExtensions.cs
+++ b/Studio/Provider/SelfHosted/HostExtensions.cs
@ -9,6 +9,7 @@ public static class HostExtensions
        Host.LM_STUDIO => "LM Studio",
        Host.LLAMACPP => "llama.cpp",
        Host.OLLAMA => "ollama",
+        Host.V_LLM => "vLLM",

        _ => "Unknown",
    };
@ -29,6 +30,7 @@ public static class HostExtensions
        {
            case Host.LM_STUDIO:
            case Host.OLLAMA:
+            case Host.V_LLM:
                return true;
            
            default:
--- a/Studio/Provider/SelfHosted/ProviderSelfHosted.cs
+++ b/Studio/Provider/SelfHosted/ProviderSelfHosted.cs
@ -58,8 +58,7 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam
            }).ToList()],
            
            // Right now, we only support streaming completions:
-            Stream = true,
-            MaxTokens = -1,
+            Stream = true
        }, JSON_SERIALIZER_OPTIONS);

        async Task<HttpRequestMessage> RequestBuilder()
@ -101,6 +100,7 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam
            
                case Host.LM_STUDIO:
                case Host.OLLAMA:
+                case Host.V_LLM:
                    return await this.LoadModels(["embed"], [], token, apiKeyProvisional);
            }

@ -127,6 +127,7 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam
            {
                case Host.LM_STUDIO:
                case Host.OLLAMA:
+                case Host.V_LLM:
                    return await this.LoadModels([], ["embed"], token, apiKeyProvisional);
            }