2024-10-07 11:26:25 +00:00
using System.Net.Http.Headers ;
2024-07-03 18:31:04 +00:00
using System.Runtime.CompilerServices ;
using AIStudio.Chat ;
using AIStudio.Provider.OpenAI ;
2025-01-02 13:50:54 +00:00
using AIStudio.Settings ;
2026-01-09 11:45:21 +00:00
using AIStudio.Tools.PluginSystem ;
2024-07-03 18:31:04 +00:00
namespace AIStudio.Provider.SelfHosted ;
2025-12-30 17:30:32 +00:00
public sealed class ProviderSelfHosted ( Host host , string hostname ) : BaseProvider ( LLMProviders . SELF_HOSTED , $"{hostname}{host.BaseURL()}" , LOGGER )
2024-07-03 18:31:04 +00:00
{
2025-09-03 19:25:17 +00:00
private static readonly ILogger < ProviderSelfHosted > LOGGER = Program . LOGGER_FACTORY . CreateLogger < ProviderSelfHosted > ( ) ;
2026-01-09 11:45:21 +00:00
private static string TB ( string fallbackEN ) = > I18N . I . T ( fallbackEN , typeof ( ProviderSelfHosted ) . Namespace , nameof ( ProviderSelfHosted ) ) ;
2025-09-03 19:25:17 +00:00
2024-07-03 18:31:04 +00:00
#region Implementation of IProvider
2026-04-16 09:24:22 +00:00
/// <inheritdoc />
2024-12-03 14:24:40 +00:00
public override string Id = > LLMProviders . SELF_HOSTED . ToName ( ) ;
2024-07-03 18:31:04 +00:00
2026-04-16 09:24:22 +00:00
/// <inheritdoc />
2024-12-03 14:24:40 +00:00
public override string InstanceName { get ; set ; } = "Self-hosted" ;
2026-04-16 09:24:22 +00:00
/// <inheritdoc />
public override bool HasModelLoadingCapability = > host is Host . OLLAMA or Host . LM_STUDIO or Host . VLLM ;
2024-07-03 18:31:04 +00:00
2024-09-01 18:10:03 +00:00
/// <inheritdoc />
2025-08-31 12:27:35 +00:00
public override async IAsyncEnumerable < ContentStreamChunk > StreamChatCompletion ( Provider . Model chatModel , ChatThread chatThread , SettingsManager settingsManager , [ EnumeratorCancellation ] CancellationToken token = default )
2024-07-03 18:31:04 +00:00
{
2026-04-13 11:33:17 +00:00
await foreach ( var content in this . StreamOpenAICompatibleChatCompletion < ChatCompletionAPIRequest , ChatCompletionDeltaStreamLine , ChatCompletionAnnotationStreamLine > (
"self-hosted provider" ,
chatModel ,
chatThread ,
settingsManager ,
async ( systemPrompt , apiParameters ) = >
{
// Build the list of messages. The image format depends on the host:
// - Ollama uses the direct image URL format: { "type": "image_url", "image_url": "data:..." }
// - LM Studio, vLLM, and llama.cpp use the nested image URL format: { "type": "image_url", "image_url": { "url": "data:..." } }
var messages = host switch
{
Host . OLLAMA = > await chatThread . Blocks . BuildMessagesUsingDirectImageUrlAsync ( this . Provider , chatModel ) ,
_ = > await chatThread . Blocks . BuildMessagesUsingNestedImageUrlAsync ( this . Provider , chatModel ) ,
} ;
return new ChatCompletionAPIRequest
{
Model = chatModel . Id ,
// Build the messages:
// - First of all the system prompt
// - Then none-empty user and AI messages
Messages = [ systemPrompt , . . messages ] ,
// Right now, we only support streaming completions:
Stream = true ,
AdditionalApiParameters = apiParameters
} ;
} ,
isTryingSecret : true ,
requestPath : host . ChatURL ( ) ,
token : token ) )
2025-01-04 13:11:32 +00:00
yield return content ;
2024-07-03 18:31:04 +00:00
}
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
2024-12-03 14:24:40 +00:00
public override async IAsyncEnumerable < ImageURL > StreamImageCompletion ( Provider . Model imageModel , string promptPositive , string promptNegative = FilterOperator . String . Empty , ImageURL referenceImageURL = default , [ EnumeratorCancellation ] CancellationToken token = default )
2024-07-03 18:31:04 +00:00
{
yield break ;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
2025-05-11 10:51:35 +00:00
2026-01-11 15:02:28 +00:00
/// <inheritdoc />
2026-05-23 09:25:18 +00:00
public override async Task < TranscriptionResult > TranscribeAudioAsync ( Provider . Model transcriptionModel , string audioFilePath , SettingsManager settingsManager , CancellationToken token = default )
2026-01-11 15:02:28 +00:00
{
var requestedSecret = await RUST_SERVICE . GetAPIKey ( this , SecretStoreType . TRANSCRIPTION_PROVIDER , isTrying : true ) ;
return await this . PerformStandardTranscriptionRequest ( requestedSecret , transcriptionModel , audioFilePath , host , token ) ;
}
2026-02-20 14:32:54 +00:00
/// <inhertidoc />
public override async Task < IReadOnlyList < IReadOnlyList < float > > > EmbedTextAsync ( Provider . Model embeddingModel , SettingsManager settingsManager , CancellationToken token = default , params List < string > texts )
{
2026-02-22 14:09:51 +00:00
var requestedSecret = await RUST_SERVICE . GetAPIKey ( this , SecretStoreType . EMBEDDING_PROVIDER , isTrying : true ) ;
return await this . PerformStandardTextEmbeddingRequest ( requestedSecret , embeddingModel , host , token : token , texts : texts ) ;
2026-02-20 14:32:54 +00:00
}
2026-04-14 11:39:11 +00:00
public override async Task < ModelLoadResult > GetTextModels ( string? apiKeyProvisional = null , CancellationToken token = default )
2024-07-03 18:31:04 +00:00
{
2024-07-16 08:28:13 +00:00
try
{
2024-12-03 14:24:40 +00:00
switch ( host )
2024-07-16 08:28:13 +00:00
{
2026-01-09 11:45:21 +00:00
case Host . LLAMA_CPP :
2024-07-16 08:28:13 +00:00
// Right now, llama.cpp only supports one model.
// There is no API to list the model(s).
2026-04-14 11:39:11 +00:00
return ModelLoadResult . FromModels ( [ new Provider . Model ( "as configured by llama.cpp" , null ) ] ) ;
2024-07-16 08:28:13 +00:00
case Host . LM_STUDIO :
case Host . OLLAMA :
2025-08-10 14:26:25 +00:00
case Host . VLLM :
2026-01-11 15:02:28 +00:00
return await this . LoadModels ( SecretStoreType . LLM_PROVIDER , [ "embed" ] , [ ] , token , apiKeyProvisional ) ;
2024-07-16 08:28:13 +00:00
}
2024-07-03 18:31:04 +00:00
2026-04-14 11:39:11 +00:00
return ModelLoadResult . FromModels ( [ ] ) ;
2024-07-16 08:28:13 +00:00
}
catch ( Exception e )
{
2025-09-03 19:25:17 +00:00
LOGGER . LogError ( $"Failed to load text models from self-hosted provider: {e.Message}" ) ;
2026-04-14 11:39:11 +00:00
return ModelLoadResult . Failure ( ModelLoadFailureReason . UNKNOWN , e . Message ) ;
2024-07-16 08:28:13 +00:00
}
2024-07-03 18:31:04 +00:00
}
/// <inheritdoc />
2026-04-14 11:39:11 +00:00
public override Task < ModelLoadResult > GetImageModels ( string? apiKeyProvisional = null , CancellationToken token = default )
2024-07-03 18:31:04 +00:00
{
2026-04-14 11:39:11 +00:00
return Task . FromResult ( ModelLoadResult . FromModels ( [ ] ) ) ;
2024-07-03 18:31:04 +00:00
}
2024-12-03 14:24:40 +00:00
2026-04-14 11:39:11 +00:00
public override async Task < ModelLoadResult > GetEmbeddingModels ( string? apiKeyProvisional = null , CancellationToken token = default )
2024-12-03 14:24:40 +00:00
{
try
{
switch ( host )
{
case Host . LM_STUDIO :
case Host . OLLAMA :
2025-08-10 14:26:25 +00:00
case Host . VLLM :
2026-01-11 15:02:28 +00:00
return await this . LoadModels ( SecretStoreType . EMBEDDING_PROVIDER , [ ] , [ "embed" ] , token , apiKeyProvisional ) ;
2024-12-03 14:24:40 +00:00
}
2026-04-14 11:39:11 +00:00
return ModelLoadResult . FromModels ( [ ] ) ;
2024-12-03 14:24:40 +00:00
}
catch ( Exception e )
{
2025-09-03 19:25:17 +00:00
LOGGER . LogError ( $"Failed to load text models from self-hosted provider: {e.Message}" ) ;
2026-04-14 11:39:11 +00:00
return ModelLoadResult . Failure ( ModelLoadFailureReason . UNKNOWN , e . Message ) ;
2024-12-03 14:24:40 +00:00
}
}
2025-05-11 10:51:35 +00:00
2026-01-09 11:45:21 +00:00
/// <inheritdoc />
2026-04-14 11:39:11 +00:00
public override async Task < ModelLoadResult > GetTranscriptionModels ( string? apiKeyProvisional = null , CancellationToken token = default )
2026-01-09 11:45:21 +00:00
{
try
{
switch ( host )
{
case Host . WHISPER_CPP :
2026-04-14 11:39:11 +00:00
return ModelLoadResult . FromModels (
[
new Provider . Model ( "loaded-model" , TB ( "Model as configured by whisper.cpp" ) ) ,
] ) ;
2026-01-09 11:45:21 +00:00
case Host . OLLAMA :
case Host . VLLM :
2026-01-18 16:15:18 +00:00
return await this . LoadModels ( SecretStoreType . TRANSCRIPTION_PROVIDER , [ ] , [ ] , token , apiKeyProvisional ) ;
2026-01-09 11:45:21 +00:00
default :
2026-04-14 11:39:11 +00:00
return ModelLoadResult . FromModels ( [ ] ) ;
2026-01-09 11:45:21 +00:00
}
}
catch ( Exception e )
{
2026-01-18 16:15:18 +00:00
LOGGER . LogError ( $"Failed to load transcription models from self-hosted provider: {e.Message}" ) ;
2026-04-14 11:39:11 +00:00
return ModelLoadResult . Failure ( ModelLoadFailureReason . UNKNOWN , e . Message ) ;
2026-01-09 11:45:21 +00:00
}
}
2024-07-03 18:31:04 +00:00
#endregion
2024-12-03 14:24:40 +00:00
2026-04-14 11:39:11 +00:00
private async Task < ModelLoadResult > LoadModels ( SecretStoreType storeType , string [ ] ignorePhrases , string [ ] filterPhrases , CancellationToken token , string? apiKeyProvisional = null )
2024-12-03 14:24:40 +00:00
{
2026-04-14 11:39:11 +00:00
var secretKey = await this . GetModelLoadingSecretKey ( storeType , apiKeyProvisional , true ) ;
2026-05-21 14:48:34 +00:00
try
{
using var lmStudioRequest = new HttpRequestMessage ( HttpMethod . Get , "models" ) ;
if ( secretKey is not null )
lmStudioRequest . Headers . Authorization = new AuthenticationHeaderValue ( "Bearer" , secretKey ) ;
using var lmStudioResponse = await this . HttpClient . SendAsync ( lmStudioRequest , token ) ;
if ( ! lmStudioResponse . IsSuccessStatusCode )
2026-05-25 15:32:54 +00:00
{
var responseBody = await lmStudioResponse . Content . ReadAsStringAsync ( token ) ;
LOGGER . LogError ( "Model loading request failed with status code {ResponseStatusCode} (message = '{ResponseReasonPhrase}', error body = '{ErrorBody}')." , lmStudioResponse . StatusCode , lmStudioResponse . ReasonPhrase , responseBody ) ;
return FailedModelLoadResult ( this . GetModelLoadFailureReason ( lmStudioResponse , responseBody ) , $"Status={(int)lmStudioResponse.StatusCode} {lmStudioResponse.ReasonPhrase}; Body='{responseBody}'" ) ;
}
2026-05-21 14:48:34 +00:00
var lmStudioModelResponse = await lmStudioResponse . Content . ReadFromJsonAsync < ModelsResponse > ( token ) ;
return SuccessfulModelLoadResult ( lmStudioModelResponse . Data .
Where ( model = > ! ignorePhrases . Any ( ignorePhrase = > model . Id . Contains ( ignorePhrase , StringComparison . InvariantCulture ) ) & &
filterPhrases . All ( filter = > model . Id . Contains ( filter , StringComparison . InvariantCulture ) ) )
. Select ( n = > new Provider . Model ( n . Id , null ) ) ) ;
}
catch ( Exception e ) when ( this . IsTimeoutException ( e , token ) )
{
await this . SendTimeoutError ( "loading the available models" ) ;
LOGGER . LogError ( e , "Timed out while loading models from self-hosted provider '{ProviderInstanceName}'." , this . InstanceName ) ;
return FailedModelLoadResult ( ModelLoadFailureReason . PROVIDER_UNAVAILABLE , e . Message ) ;
}
2024-12-03 14:24:40 +00:00
}
2026-04-14 11:39:11 +00:00
}