2024-10-07 11:26:25 +00:00
using System.Net.Http.Headers ;
2024-07-03 18:31:04 +00:00
using System.Runtime.CompilerServices ;
2026-06-11 13:46:17 +00:00
using System.Text.Json ;
2024-07-03 18:31:04 +00:00
using AIStudio.Chat ;
using AIStudio.Provider.OpenAI ;
2025-01-02 13:50:54 +00:00
using AIStudio.Settings ;
2026-01-09 11:45:21 +00:00
using AIStudio.Tools.PluginSystem ;
2024-07-03 18:31:04 +00:00
namespace AIStudio.Provider.SelfHosted ;
2026-05-31 16:46:54 +00:00
public sealed class ProviderSelfHosted ( Host host , string hostname ) : BaseProvider ( LLMProviders . SELF_HOSTED , new Uri ( $"{hostname}{host.BaseURL()}" ) , ExternalHttpTrustPolicy . ALLOW_CUSTOM_ROOTS_WHEN_HOST_WHITELISTED , LOGGER )
2024-07-03 18:31:04 +00:00
{
2025-09-03 19:25:17 +00:00
private static readonly ILogger < ProviderSelfHosted > LOGGER = Program . LOGGER_FACTORY . CreateLogger < ProviderSelfHosted > ( ) ;
2026-01-09 11:45:21 +00:00
private static string TB ( string fallbackEN ) = > I18N . I . T ( fallbackEN , typeof ( ProviderSelfHosted ) . Namespace , nameof ( ProviderSelfHosted ) ) ;
2025-09-03 19:25:17 +00:00
2024-07-03 18:31:04 +00:00
#region Implementation of IProvider
2026-04-16 09:24:22 +00:00
/// <inheritdoc />
2024-12-03 14:24:40 +00:00
public override string Id = > LLMProviders . SELF_HOSTED . ToName ( ) ;
2024-07-03 18:31:04 +00:00
2026-04-16 09:24:22 +00:00
/// <inheritdoc />
2024-12-03 14:24:40 +00:00
public override string InstanceName { get ; set ; } = "Self-hosted" ;
2026-04-16 09:24:22 +00:00
/// <inheritdoc />
2026-06-11 13:46:17 +00:00
public override bool HasModelLoadingCapability = > host is Host . OLLAMA or Host . LM_STUDIO or Host . VLLM or Host . LLAMA_CPP ;
2024-07-03 18:31:04 +00:00
2024-09-01 18:10:03 +00:00
/// <inheritdoc />
2025-08-31 12:27:35 +00:00
public override async IAsyncEnumerable < ContentStreamChunk > StreamChatCompletion ( Provider . Model chatModel , ChatThread chatThread , SettingsManager settingsManager , [ EnumeratorCancellation ] CancellationToken token = default )
2024-07-03 18:31:04 +00:00
{
2026-06-11 13:46:17 +00:00
var effectiveChatModel = await this . ResolveChatModelForRequest ( chatModel , token ) ;
2026-04-13 11:33:17 +00:00
await foreach ( var content in this . StreamOpenAICompatibleChatCompletion < ChatCompletionAPIRequest , ChatCompletionDeltaStreamLine , ChatCompletionAnnotationStreamLine > (
"self-hosted provider" ,
2026-06-11 13:46:17 +00:00
effectiveChatModel ,
2026-04-13 11:33:17 +00:00
chatThread ,
settingsManager ,
async ( systemPrompt , apiParameters ) = >
{
// Build the list of messages. The image format depends on the host:
// - Ollama uses the direct image URL format: { "type": "image_url", "image_url": "data:..." }
// - LM Studio, vLLM, and llama.cpp use the nested image URL format: { "type": "image_url", "image_url": { "url": "data:..." } }
var messages = host switch
{
2026-06-11 13:46:17 +00:00
Host . OLLAMA = > await chatThread . Blocks . BuildMessagesUsingDirectImageUrlAsync ( this . Provider , effectiveChatModel ) ,
_ = > await chatThread . Blocks . BuildMessagesUsingNestedImageUrlAsync ( this . Provider , effectiveChatModel ) ,
2026-04-13 11:33:17 +00:00
} ;
return new ChatCompletionAPIRequest
{
2026-06-11 13:46:17 +00:00
Model = effectiveChatModel . Id ,
2026-04-13 11:33:17 +00:00
// Build the messages:
// - First of all the system prompt
// - Then none-empty user and AI messages
Messages = [ systemPrompt , . . messages ] ,
// Right now, we only support streaming completions:
Stream = true ,
AdditionalApiParameters = apiParameters
} ;
} ,
isTryingSecret : true ,
requestPath : host . ChatURL ( ) ,
token : token ) )
2025-01-04 13:11:32 +00:00
yield return content ;
2024-07-03 18:31:04 +00:00
}
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
2024-12-03 14:24:40 +00:00
public override async IAsyncEnumerable < ImageURL > StreamImageCompletion ( Provider . Model imageModel , string promptPositive , string promptNegative = FilterOperator . String . Empty , ImageURL referenceImageURL = default , [ EnumeratorCancellation ] CancellationToken token = default )
2024-07-03 18:31:04 +00:00
{
yield break ;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
2025-05-11 10:51:35 +00:00
2026-01-11 15:02:28 +00:00
/// <inheritdoc />
2026-05-23 09:25:18 +00:00
public override async Task < TranscriptionResult > TranscribeAudioAsync ( Provider . Model transcriptionModel , string audioFilePath , SettingsManager settingsManager , CancellationToken token = default )
2026-01-11 15:02:28 +00:00
{
2026-06-10 19:01:27 +00:00
var requestedSecret = await Program . RUST_SERVICE . GetAPIKey ( this , SecretStoreType . TRANSCRIPTION_PROVIDER , isTrying : true ) ;
2026-01-11 15:02:28 +00:00
return await this . PerformStandardTranscriptionRequest ( requestedSecret , transcriptionModel , audioFilePath , host , token ) ;
}
2026-02-20 14:32:54 +00:00
/// <inhertidoc />
public override async Task < IReadOnlyList < IReadOnlyList < float > > > EmbedTextAsync ( Provider . Model embeddingModel , SettingsManager settingsManager , CancellationToken token = default , params List < string > texts )
{
2026-06-10 19:01:27 +00:00
var requestedSecret = await Program . RUST_SERVICE . GetAPIKey ( this , SecretStoreType . EMBEDDING_PROVIDER , isTrying : true ) ;
2026-02-22 14:09:51 +00:00
return await this . PerformStandardTextEmbeddingRequest ( requestedSecret , embeddingModel , host , token : token , texts : texts ) ;
2026-02-20 14:32:54 +00:00
}
2026-04-14 11:39:11 +00:00
public override async Task < ModelLoadResult > GetTextModels ( string? apiKeyProvisional = null , CancellationToken token = default )
2024-07-03 18:31:04 +00:00
{
2024-07-16 08:28:13 +00:00
try
{
2024-12-03 14:24:40 +00:00
switch ( host )
2024-07-16 08:28:13 +00:00
{
2026-01-09 11:45:21 +00:00
case Host . LLAMA_CPP :
2026-06-11 13:46:17 +00:00
return await this . LoadLlamaCppTextModels ( [ "embed" ] , [ ] , token , apiKeyProvisional ) ;
2024-07-16 08:28:13 +00:00
case Host . LM_STUDIO :
case Host . OLLAMA :
2025-08-10 14:26:25 +00:00
case Host . VLLM :
2026-01-11 15:02:28 +00:00
return await this . LoadModels ( SecretStoreType . LLM_PROVIDER , [ "embed" ] , [ ] , token , apiKeyProvisional ) ;
2024-07-16 08:28:13 +00:00
}
2024-07-03 18:31:04 +00:00
2026-04-14 11:39:11 +00:00
return ModelLoadResult . FromModels ( [ ] ) ;
2024-07-16 08:28:13 +00:00
}
catch ( Exception e )
{
2025-09-03 19:25:17 +00:00
LOGGER . LogError ( $"Failed to load text models from self-hosted provider: {e.Message}" ) ;
2026-04-14 11:39:11 +00:00
return ModelLoadResult . Failure ( ModelLoadFailureReason . UNKNOWN , e . Message ) ;
2024-07-16 08:28:13 +00:00
}
2024-07-03 18:31:04 +00:00
}
/// <inheritdoc />
2026-04-14 11:39:11 +00:00
public override Task < ModelLoadResult > GetImageModels ( string? apiKeyProvisional = null , CancellationToken token = default )
2024-07-03 18:31:04 +00:00
{
2026-04-14 11:39:11 +00:00
return Task . FromResult ( ModelLoadResult . FromModels ( [ ] ) ) ;
2024-07-03 18:31:04 +00:00
}
2024-12-03 14:24:40 +00:00
2026-04-14 11:39:11 +00:00
public override async Task < ModelLoadResult > GetEmbeddingModels ( string? apiKeyProvisional = null , CancellationToken token = default )
2024-12-03 14:24:40 +00:00
{
try
{
switch ( host )
{
case Host . LM_STUDIO :
case Host . OLLAMA :
2025-08-10 14:26:25 +00:00
case Host . VLLM :
2026-01-11 15:02:28 +00:00
return await this . LoadModels ( SecretStoreType . EMBEDDING_PROVIDER , [ ] , [ "embed" ] , token , apiKeyProvisional ) ;
2024-12-03 14:24:40 +00:00
}
2026-04-14 11:39:11 +00:00
return ModelLoadResult . FromModels ( [ ] ) ;
2024-12-03 14:24:40 +00:00
}
catch ( Exception e )
{
2025-09-03 19:25:17 +00:00
LOGGER . LogError ( $"Failed to load text models from self-hosted provider: {e.Message}" ) ;
2026-04-14 11:39:11 +00:00
return ModelLoadResult . Failure ( ModelLoadFailureReason . UNKNOWN , e . Message ) ;
2024-12-03 14:24:40 +00:00
}
}
2025-05-11 10:51:35 +00:00
2026-01-09 11:45:21 +00:00
/// <inheritdoc />
2026-04-14 11:39:11 +00:00
public override async Task < ModelLoadResult > GetTranscriptionModels ( string? apiKeyProvisional = null , CancellationToken token = default )
2026-01-09 11:45:21 +00:00
{
try
{
switch ( host )
{
case Host . WHISPER_CPP :
2026-04-14 11:39:11 +00:00
return ModelLoadResult . FromModels (
[
new Provider . Model ( "loaded-model" , TB ( "Model as configured by whisper.cpp" ) ) ,
] ) ;
2026-01-09 11:45:21 +00:00
case Host . OLLAMA :
case Host . VLLM :
2026-01-18 16:15:18 +00:00
return await this . LoadModels ( SecretStoreType . TRANSCRIPTION_PROVIDER , [ ] , [ ] , token , apiKeyProvisional ) ;
2026-01-09 11:45:21 +00:00
default :
2026-04-14 11:39:11 +00:00
return ModelLoadResult . FromModels ( [ ] ) ;
2026-01-09 11:45:21 +00:00
}
}
catch ( Exception e )
{
2026-01-18 16:15:18 +00:00
LOGGER . LogError ( $"Failed to load transcription models from self-hosted provider: {e.Message}" ) ;
2026-04-14 11:39:11 +00:00
return ModelLoadResult . Failure ( ModelLoadFailureReason . UNKNOWN , e . Message ) ;
2026-01-09 11:45:21 +00:00
}
}
2024-07-03 18:31:04 +00:00
#endregion
2024-12-03 14:24:40 +00:00
2026-04-14 11:39:11 +00:00
private async Task < ModelLoadResult > LoadModels ( SecretStoreType storeType , string [ ] ignorePhrases , string [ ] filterPhrases , CancellationToken token , string? apiKeyProvisional = null )
2024-12-03 14:24:40 +00:00
{
2026-04-14 11:39:11 +00:00
var secretKey = await this . GetModelLoadingSecretKey ( storeType , apiKeyProvisional , true ) ;
2026-05-21 14:48:34 +00:00
try
{
using var lmStudioRequest = new HttpRequestMessage ( HttpMethod . Get , "models" ) ;
if ( secretKey is not null )
lmStudioRequest . Headers . Authorization = new AuthenticationHeaderValue ( "Bearer" , secretKey ) ;
using var lmStudioResponse = await this . HttpClient . SendAsync ( lmStudioRequest , token ) ;
if ( ! lmStudioResponse . IsSuccessStatusCode )
2026-05-25 15:32:54 +00:00
{
var responseBody = await lmStudioResponse . Content . ReadAsStringAsync ( token ) ;
LOGGER . LogError ( "Model loading request failed with status code {ResponseStatusCode} (message = '{ResponseReasonPhrase}', error body = '{ErrorBody}')." , lmStudioResponse . StatusCode , lmStudioResponse . ReasonPhrase , responseBody ) ;
return FailedModelLoadResult ( this . GetModelLoadFailureReason ( lmStudioResponse , responseBody ) , $"Status={(int)lmStudioResponse.StatusCode} {lmStudioResponse.ReasonPhrase}; Body='{responseBody}'" ) ;
}
2026-05-21 14:48:34 +00:00
var lmStudioModelResponse = await lmStudioResponse . Content . ReadFromJsonAsync < ModelsResponse > ( token ) ;
2026-06-11 13:46:17 +00:00
var models = lmStudioModelResponse . Data ? ? [ ] ;
return SuccessfulModelLoadResult ( models .
Where ( model = > ! string . IsNullOrWhiteSpace ( model . Id ) & &
! ignorePhrases . Any ( ignorePhrase = > model . Id . Contains ( ignorePhrase , StringComparison . InvariantCulture ) ) & &
2026-05-21 14:48:34 +00:00
filterPhrases . All ( filter = > model . Id . Contains ( filter , StringComparison . InvariantCulture ) ) )
. Select ( n = > new Provider . Model ( n . Id , null ) ) ) ;
}
catch ( Exception e ) when ( this . IsTimeoutException ( e , token ) )
{
await this . SendTimeoutError ( "loading the available models" ) ;
LOGGER . LogError ( e , "Timed out while loading models from self-hosted provider '{ProviderInstanceName}'." , this . InstanceName ) ;
return FailedModelLoadResult ( ModelLoadFailureReason . PROVIDER_UNAVAILABLE , e . Message ) ;
}
2024-12-03 14:24:40 +00:00
}
2026-06-11 13:46:17 +00:00
private async Task < Provider . Model > ResolveChatModelForRequest ( Provider . Model chatModel , CancellationToken token )
{
if ( host is not Host . LLAMA_CPP | | ! chatModel . IsSystemModel )
return chatModel ;
var modelLoadResult = await this . LoadLlamaCppTextModels ( [ "embed" ] , [ ] , token ) ;
if ( ! modelLoadResult . Success )
return chatModel ;
var availableModels = modelLoadResult . Models
. Where ( model = > ! model . IsSystemModel & & ! string . IsNullOrWhiteSpace ( model . Id ) )
. ToList ( ) ;
if ( modelLoadResult . Models . All ( model = > ! model . IsSystemModel ) & & availableModels . Count is 0 )
{
LOGGER . LogError ( "The llama.cpp provider '{ProviderInstanceName}' does not offer a usable text model. Please check your provider settings." , this . InstanceName ) ;
throw new ProviderRequestException (
ProviderRequestFailureReason . NONE ,
string . Format (
TB ( "The llama.cpp provider '{0}' does not offer a usable text model. Please check your provider settings." ) ,
this . InstanceName ) ) ;
}
if ( availableModels . Count is 1 )
return availableModels [ 0 ] ;
if ( availableModels . Count > 1 )
{
LOGGER . LogError (
"The llama.cpp provider '{ProviderInstanceName}' offers {ModelCount} models, but the configured model is the legacy system placeholder. The provider settings must be updated to select a specific model." ,
this . InstanceName ,
availableModels . Count ) ;
throw new ProviderRequestException (
ProviderRequestFailureReason . NONE ,
string . Format (
TB ( "The llama.cpp provider '{0}' offers multiple models. Please open the provider settings and select the model to use." ) ,
this . InstanceName ) ) ;
}
return chatModel ;
}
private async Task < ModelLoadResult > LoadLlamaCppTextModels ( string [ ] ignorePhrases , string [ ] filterPhrases , CancellationToken token , string? apiKeyProvisional = null )
{
var secretKey = await this . GetModelLoadingSecretKey ( SecretStoreType . LLM_PROVIDER , apiKeyProvisional , true ) ;
try
{
using var request = new HttpRequestMessage ( HttpMethod . Get , "models" ) ;
if ( ! string . IsNullOrWhiteSpace ( secretKey ) )
request . Headers . Authorization = new AuthenticationHeaderValue ( "Bearer" , secretKey ) ;
using var response = await this . HttpClient . SendAsync ( request , token ) ;
var responseBody = await response . Content . ReadAsStringAsync ( token ) ;
if ( ! response . IsSuccessStatusCode )
{
if ( response . StatusCode is System . Net . HttpStatusCode . NotFound )
return LlamaCppLegacyModelResult ( ) ;
LOGGER . LogError ( "llama.cpp model loading request failed with status code {ResponseStatusCode} (message = '{ResponseReasonPhrase}', error body = '{ErrorBody}')." , response . StatusCode , response . ReasonPhrase , responseBody ) ;
return FailedModelLoadResult ( this . GetModelLoadFailureReason ( response , responseBody ) , $"Status={(int)response.StatusCode} {response.ReasonPhrase}; Body='{responseBody}'" ) ;
}
try
{
var modelResponse = JsonSerializer . Deserialize < ModelsResponse > ( responseBody , JSON_SERIALIZER_OPTIONS ) ;
var responseModels = modelResponse . Data ?
. Where ( model = > ! string . IsNullOrWhiteSpace ( model . Id ) )
. ToList ( ) ? ? [ ] ;
if ( responseModels . Count is 0 )
return LlamaCppLegacyModelResult ( ) ;
var models = responseModels
. Where ( model = > IsMatchingLlamaCppTextModel ( model , ignorePhrases , filterPhrases ) )
. Select ( model = > new Provider . Model ( model . Id , null ) )
. ToList ( ) ;
return SuccessfulModelLoadResult ( models ) ;
}
catch ( JsonException e )
{
LOGGER . LogWarning ( e , "The llama.cpp model loading response could not be parsed. Falling back to the legacy system-configured model." ) ;
return LlamaCppLegacyModelResult ( ) ;
}
}
catch ( Exception e ) when ( this . IsTimeoutException ( e , token ) )
{
await this . SendTimeoutError ( "loading the available models" ) ;
LOGGER . LogError ( e , "Timed out while loading models from llama.cpp provider '{ProviderInstanceName}'." , this . InstanceName ) ;
return FailedModelLoadResult ( ModelLoadFailureReason . PROVIDER_UNAVAILABLE , e . Message ) ;
}
catch ( Exception e )
{
LOGGER . LogError ( e , "Failed to load models from llama.cpp provider '{ProviderInstanceName}'." , this . InstanceName ) ;
return FailedModelLoadResult ( ModelLoadFailureReason . UNKNOWN , e . Message ) ;
}
}
private static bool IsMatchingLlamaCppTextModel ( Model model , string [ ] ignorePhrases , string [ ] filterPhrases )
{
if ( string . IsNullOrWhiteSpace ( model . Id ) )
return false ;
if ( ignorePhrases . Any ( ignorePhrase = > model . Id . Contains ( ignorePhrase , StringComparison . InvariantCultureIgnoreCase ) ) )
return false ;
if ( ! filterPhrases . All ( filter = > model . Id . Contains ( filter , StringComparison . InvariantCultureIgnoreCase ) ) )
return false ;
var outputModalities = model . Architecture ? . OutputModalities ;
if ( outputModalities is { Length : > 0 } & &
! outputModalities . Any ( modality = > string . Equals ( modality , "text" , StringComparison . OrdinalIgnoreCase ) ) )
return false ;
return true ;
}
private static ModelLoadResult LlamaCppLegacyModelResult ( )
{
return ModelLoadResult . FromModels ( [ AIStudio . Provider . Model . SYSTEM_MODEL ] ) ;
}
2026-04-14 11:39:11 +00:00
}