Removed the MaxTokens parameter, because vLLM doesnt support setting the value to -1.

This commit is contained in:
Peer Schütt 2025-07-24 17:23:30 +02:00
parent 6116c03f7c
commit ca267cf174
6 changed files with 10 additions and 8 deletions

View File

@ -164,7 +164,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
// //
// We cannot load the API key for self-hosted providers: // We cannot load the API key for self-hosted providers:
// //
if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA) if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA && this.DataHost is not Host.V_LLM)
{ {
await this.ReloadModels(); await this.ReloadModels();
await base.OnInitializedAsync(); await base.OnInitializedAsync();

View File

@ -285,7 +285,7 @@ public static class LLMProvidersExtensions
LLMProviders.GWDG => true, LLMProviders.GWDG => true,
LLMProviders.HUGGINGFACE => true, LLMProviders.HUGGINGFACE => true,
LLMProviders.SELF_HOSTED => host is Host.OLLAMA, LLMProviders.SELF_HOSTED => host is (Host.OLLAMA or Host.V_LLM),
_ => false, _ => false,
}; };
@ -322,6 +322,7 @@ public static class LLMProvidersExtensions
case Host.OLLAMA: case Host.OLLAMA:
case Host.LM_STUDIO: case Host.LM_STUDIO:
case Host.V_LLM:
return true; return true;
} }
} }

View File

@ -6,11 +6,8 @@ namespace AIStudio.Provider.SelfHosted;
/// <param name="Model">Which model to use for chat completion.</param> /// <param name="Model">Which model to use for chat completion.</param>
/// <param name="Messages">The chat messages.</param> /// <param name="Messages">The chat messages.</param>
/// <param name="Stream">Whether to stream the chat completion.</param> /// <param name="Stream">Whether to stream the chat completion.</param>
/// <param name="MaxTokens">The maximum number of tokens to generate.</param>
public readonly record struct ChatRequest( public readonly record struct ChatRequest(
string Model, string Model,
IList<Message> Messages, IList<Message> Messages,
bool Stream, bool Stream
int MaxTokens
); );

View File

@ -7,4 +7,5 @@ public enum Host
LM_STUDIO, LM_STUDIO,
LLAMACPP, LLAMACPP,
OLLAMA, OLLAMA,
V_LLM,
} }

View File

@ -9,6 +9,7 @@ public static class HostExtensions
Host.LM_STUDIO => "LM Studio", Host.LM_STUDIO => "LM Studio",
Host.LLAMACPP => "llama.cpp", Host.LLAMACPP => "llama.cpp",
Host.OLLAMA => "ollama", Host.OLLAMA => "ollama",
Host.V_LLM => "vLLM",
_ => "Unknown", _ => "Unknown",
}; };
@ -29,6 +30,7 @@ public static class HostExtensions
{ {
case Host.LM_STUDIO: case Host.LM_STUDIO:
case Host.OLLAMA: case Host.OLLAMA:
case Host.V_LLM:
return true; return true;
default: default:

View File

@ -58,8 +58,7 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam
}).ToList()], }).ToList()],
// Right now, we only support streaming completions: // Right now, we only support streaming completions:
Stream = true, Stream = true
MaxTokens = -1,
}, JSON_SERIALIZER_OPTIONS); }, JSON_SERIALIZER_OPTIONS);
async Task<HttpRequestMessage> RequestBuilder() async Task<HttpRequestMessage> RequestBuilder()
@ -101,6 +100,7 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam
case Host.LM_STUDIO: case Host.LM_STUDIO:
case Host.OLLAMA: case Host.OLLAMA:
case Host.V_LLM:
return await this.LoadModels(["embed"], [], token, apiKeyProvisional); return await this.LoadModels(["embed"], [], token, apiKeyProvisional);
} }
@ -127,6 +127,7 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam
{ {
case Host.LM_STUDIO: case Host.LM_STUDIO:
case Host.OLLAMA: case Host.OLLAMA:
case Host.V_LLM:
return await this.LoadModels([], ["embed"], token, apiKeyProvisional); return await this.LoadModels([], ["embed"], token, apiKeyProvisional);
} }