2024-10-07 11:26:25 +00:00
|
|
|
using System.Net.Http.Headers;
|
2024-07-03 18:31:04 +00:00
|
|
|
using System.Runtime.CompilerServices;
|
|
|
|
|
using System.Text;
|
|
|
|
|
using System.Text.Json;
|
|
|
|
|
|
|
|
|
|
using AIStudio.Chat;
|
|
|
|
|
using AIStudio.Provider.OpenAI;
|
2025-01-02 13:50:54 +00:00
|
|
|
using AIStudio.Settings;
|
2026-01-09 11:45:21 +00:00
|
|
|
using AIStudio.Tools.PluginSystem;
|
2024-07-03 18:31:04 +00:00
|
|
|
|
|
|
|
|
namespace AIStudio.Provider.SelfHosted;
|
|
|
|
|
|
2025-12-30 17:30:32 +00:00
|
|
|
public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvider(LLMProviders.SELF_HOSTED, $"{hostname}{host.BaseURL()}", LOGGER)
|
2024-07-03 18:31:04 +00:00
|
|
|
{
|
2025-09-03 19:25:17 +00:00
|
|
|
private static readonly ILogger<ProviderSelfHosted> LOGGER = Program.LOGGER_FACTORY.CreateLogger<ProviderSelfHosted>();
|
2026-01-09 11:45:21 +00:00
|
|
|
|
|
|
|
|
private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(ProviderSelfHosted).Namespace, nameof(ProviderSelfHosted));
|
2025-09-03 19:25:17 +00:00
|
|
|
|
2024-07-03 18:31:04 +00:00
|
|
|
#region Implementation of IProvider
|
|
|
|
|
|
2024-12-03 14:24:40 +00:00
|
|
|
public override string Id => LLMProviders.SELF_HOSTED.ToName();
|
2024-07-03 18:31:04 +00:00
|
|
|
|
2024-12-03 14:24:40 +00:00
|
|
|
public override string InstanceName { get; set; } = "Self-hosted";
|
2024-07-03 18:31:04 +00:00
|
|
|
|
2024-09-01 18:10:03 +00:00
|
|
|
/// <inheritdoc />
|
2025-08-31 12:27:35 +00:00
|
|
|
public override async IAsyncEnumerable<ContentStreamChunk> StreamChatCompletion(Provider.Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default)
|
2024-07-03 18:31:04 +00:00
|
|
|
{
|
2024-10-07 11:26:25 +00:00
|
|
|
// Get the API key:
|
2026-01-11 15:02:28 +00:00
|
|
|
var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER, isTrying: true);
|
2024-10-07 11:26:25 +00:00
|
|
|
|
2024-07-03 18:31:04 +00:00
|
|
|
// Prepare the system prompt:
|
2025-12-28 13:10:20 +00:00
|
|
|
var systemPrompt = new TextMessage
|
2024-07-03 18:31:04 +00:00
|
|
|
{
|
|
|
|
|
Role = "system",
|
2026-01-18 19:36:04 +00:00
|
|
|
Content = chatThread.PrepareSystemPrompt(settingsManager),
|
2024-07-03 18:31:04 +00:00
|
|
|
};
|
|
|
|
|
|
2025-11-13 17:13:16 +00:00
|
|
|
// Parse the API parameters:
|
|
|
|
|
var apiParameters = this.ParseAdditionalApiParameters();
|
2025-12-10 12:48:13 +00:00
|
|
|
|
2025-12-30 17:30:32 +00:00
|
|
|
// Build the list of messages. The image format depends on the host:
|
|
|
|
|
// - Ollama uses the direct image URL format: { "type": "image_url", "image_url": "data:..." }
|
|
|
|
|
// - LM Studio, vLLM, and llama.cpp use the nested image URL format: { "type": "image_url", "image_url": { "url": "data:..." } }
|
|
|
|
|
var messages = host switch
|
|
|
|
|
{
|
|
|
|
|
Host.OLLAMA => await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel),
|
|
|
|
|
_ => await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel),
|
|
|
|
|
};
|
2025-12-10 12:48:13 +00:00
|
|
|
|
2024-07-03 18:31:04 +00:00
|
|
|
// Prepare the OpenAI HTTP chat request:
|
|
|
|
|
var providerChatRequest = JsonSerializer.Serialize(new ChatRequest
|
|
|
|
|
{
|
2024-07-16 08:28:13 +00:00
|
|
|
Model = chatModel.Id,
|
2024-07-03 18:31:04 +00:00
|
|
|
|
|
|
|
|
// Build the messages:
|
|
|
|
|
// - First of all the system prompt
|
|
|
|
|
// - Then none-empty user and AI messages
|
2025-12-10 12:48:13 +00:00
|
|
|
Messages = [systemPrompt, ..messages],
|
2024-07-03 18:31:04 +00:00
|
|
|
|
|
|
|
|
// Right now, we only support streaming completions:
|
2025-11-13 17:13:16 +00:00
|
|
|
Stream = true,
|
|
|
|
|
AdditionalApiParameters = apiParameters
|
2024-07-03 18:31:04 +00:00
|
|
|
}, JSON_SERIALIZER_OPTIONS);
|
|
|
|
|
|
2025-01-04 13:11:32 +00:00
|
|
|
async Task<HttpRequestMessage> RequestBuilder()
|
2024-10-07 11:26:25 +00:00
|
|
|
{
|
2025-01-04 13:11:32 +00:00
|
|
|
// Build the HTTP post request:
|
|
|
|
|
var request = new HttpRequestMessage(HttpMethod.Post, host.ChatURL());
|
2024-10-07 11:26:25 +00:00
|
|
|
|
2025-01-04 13:11:32 +00:00
|
|
|
// Set the authorization header:
|
|
|
|
|
if (requestedSecret.Success)
|
|
|
|
|
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
2024-10-07 11:26:25 +00:00
|
|
|
|
2025-01-04 13:11:32 +00:00
|
|
|
// Set the content:
|
|
|
|
|
request.Content = new StringContent(providerChatRequest, Encoding.UTF8, "application/json");
|
|
|
|
|
return request;
|
2024-07-03 18:31:04 +00:00
|
|
|
}
|
2025-01-04 11:37:49 +00:00
|
|
|
|
2025-09-03 08:08:04 +00:00
|
|
|
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionDeltaStreamLine, ChatCompletionAnnotationStreamLine>("self-hosted provider", RequestBuilder, token))
|
2025-01-04 13:11:32 +00:00
|
|
|
yield return content;
|
2024-07-03 18:31:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously
|
|
|
|
|
/// <inheritdoc />
|
2024-12-03 14:24:40 +00:00
|
|
|
public override async IAsyncEnumerable<ImageURL> StreamImageCompletion(Provider.Model imageModel, string promptPositive, string promptNegative = FilterOperator.String.Empty, ImageURL referenceImageURL = default, [EnumeratorCancellation] CancellationToken token = default)
|
2024-07-03 18:31:04 +00:00
|
|
|
{
|
|
|
|
|
yield break;
|
|
|
|
|
}
|
|
|
|
|
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
|
2025-05-11 10:51:35 +00:00
|
|
|
|
2026-01-11 15:02:28 +00:00
|
|
|
/// <inheritdoc />
|
|
|
|
|
public override async Task<string> TranscribeAudioAsync(Provider.Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
|
|
|
|
|
{
|
|
|
|
|
var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.TRANSCRIPTION_PROVIDER, isTrying: true);
|
|
|
|
|
return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, host, token);
|
|
|
|
|
}
|
|
|
|
|
|
2024-12-03 14:24:40 +00:00
|
|
|
public override async Task<IEnumerable<Provider.Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
|
2024-07-03 18:31:04 +00:00
|
|
|
{
|
2024-07-16 08:28:13 +00:00
|
|
|
try
|
|
|
|
|
{
|
2024-12-03 14:24:40 +00:00
|
|
|
switch (host)
|
2024-07-16 08:28:13 +00:00
|
|
|
{
|
2026-01-09 11:45:21 +00:00
|
|
|
case Host.LLAMA_CPP:
|
2024-07-16 08:28:13 +00:00
|
|
|
// Right now, llama.cpp only supports one model.
|
|
|
|
|
// There is no API to list the model(s).
|
2024-11-09 21:04:00 +00:00
|
|
|
return [ new Provider.Model("as configured by llama.cpp", null) ];
|
2024-07-16 08:28:13 +00:00
|
|
|
|
|
|
|
|
case Host.LM_STUDIO:
|
|
|
|
|
case Host.OLLAMA:
|
2025-08-10 14:26:25 +00:00
|
|
|
case Host.VLLM:
|
2026-01-11 15:02:28 +00:00
|
|
|
return await this.LoadModels( SecretStoreType.LLM_PROVIDER, ["embed"], [], token, apiKeyProvisional);
|
2024-07-16 08:28:13 +00:00
|
|
|
}
|
2024-07-03 18:31:04 +00:00
|
|
|
|
2024-07-16 08:28:13 +00:00
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
catch(Exception e)
|
|
|
|
|
{
|
2025-09-03 19:25:17 +00:00
|
|
|
LOGGER.LogError($"Failed to load text models from self-hosted provider: {e.Message}");
|
2024-07-16 08:28:13 +00:00
|
|
|
return [];
|
|
|
|
|
}
|
2024-07-03 18:31:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <inheritdoc />
|
2024-12-03 14:24:40 +00:00
|
|
|
public override Task<IEnumerable<Provider.Model>> GetImageModels(string? apiKeyProvisional = null, CancellationToken token = default)
|
2024-07-03 18:31:04 +00:00
|
|
|
{
|
|
|
|
|
return Task.FromResult(Enumerable.Empty<Provider.Model>());
|
|
|
|
|
}
|
2024-12-03 14:24:40 +00:00
|
|
|
|
|
|
|
|
public override async Task<IEnumerable<Provider.Model>> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default)
|
|
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
switch (host)
|
|
|
|
|
{
|
|
|
|
|
case Host.LM_STUDIO:
|
|
|
|
|
case Host.OLLAMA:
|
2025-08-10 14:26:25 +00:00
|
|
|
case Host.VLLM:
|
2026-01-11 15:02:28 +00:00
|
|
|
return await this.LoadModels( SecretStoreType.EMBEDDING_PROVIDER, [], ["embed"], token, apiKeyProvisional);
|
2024-12-03 14:24:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
catch(Exception e)
|
|
|
|
|
{
|
2025-09-03 19:25:17 +00:00
|
|
|
LOGGER.LogError($"Failed to load text models from self-hosted provider: {e.Message}");
|
2024-12-03 14:24:40 +00:00
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-05-11 10:51:35 +00:00
|
|
|
|
2026-01-09 11:45:21 +00:00
|
|
|
/// <inheritdoc />
|
2026-01-18 16:15:18 +00:00
|
|
|
public override async Task<IEnumerable<Provider.Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
|
2026-01-09 11:45:21 +00:00
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
switch (host)
|
|
|
|
|
{
|
|
|
|
|
case Host.WHISPER_CPP:
|
2026-01-18 16:15:18 +00:00
|
|
|
return new List<Provider.Model>
|
|
|
|
|
{
|
|
|
|
|
new("loaded-model", TB("Model as configured by whisper.cpp")),
|
|
|
|
|
};
|
2026-01-09 11:45:21 +00:00
|
|
|
|
|
|
|
|
case Host.OLLAMA:
|
|
|
|
|
case Host.VLLM:
|
2026-01-18 16:15:18 +00:00
|
|
|
return await this.LoadModels(SecretStoreType.TRANSCRIPTION_PROVIDER, [], [], token, apiKeyProvisional);
|
2026-01-09 11:45:21 +00:00
|
|
|
|
|
|
|
|
default:
|
2026-01-18 16:15:18 +00:00
|
|
|
return [];
|
2026-01-09 11:45:21 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
catch (Exception e)
|
|
|
|
|
{
|
2026-01-18 16:15:18 +00:00
|
|
|
LOGGER.LogError($"Failed to load transcription models from self-hosted provider: {e.Message}");
|
|
|
|
|
return [];
|
2026-01-09 11:45:21 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-03 18:31:04 +00:00
|
|
|
#endregion
|
2024-12-03 14:24:40 +00:00
|
|
|
|
2026-01-11 15:02:28 +00:00
|
|
|
private async Task<IEnumerable<Provider.Model>> LoadModels(SecretStoreType storeType, string[] ignorePhrases, string[] filterPhrases, CancellationToken token, string? apiKeyProvisional = null)
|
2024-12-03 14:24:40 +00:00
|
|
|
{
|
|
|
|
|
var secretKey = apiKeyProvisional switch
|
|
|
|
|
{
|
|
|
|
|
not null => apiKeyProvisional,
|
2026-01-11 15:02:28 +00:00
|
|
|
_ => await RUST_SERVICE.GetAPIKey(this, storeType, isTrying: true) switch
|
2024-12-03 14:24:40 +00:00
|
|
|
{
|
|
|
|
|
{ Success: true } result => await result.Secret.Decrypt(ENCRYPTION),
|
|
|
|
|
_ => null,
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2025-02-09 11:36:37 +00:00
|
|
|
using var lmStudioRequest = new HttpRequestMessage(HttpMethod.Get, "models");
|
2024-12-03 14:24:40 +00:00
|
|
|
if(secretKey is not null)
|
|
|
|
|
lmStudioRequest.Headers.Authorization = new AuthenticationHeaderValue("Bearer", apiKeyProvisional);
|
|
|
|
|
|
2025-02-09 11:36:37 +00:00
|
|
|
using var lmStudioResponse = await this.httpClient.SendAsync(lmStudioRequest, token);
|
2024-12-03 14:24:40 +00:00
|
|
|
if(!lmStudioResponse.IsSuccessStatusCode)
|
|
|
|
|
return [];
|
|
|
|
|
|
|
|
|
|
var lmStudioModelResponse = await lmStudioResponse.Content.ReadFromJsonAsync<ModelsResponse>(token);
|
|
|
|
|
return lmStudioModelResponse.Data.
|
|
|
|
|
Where(model => !ignorePhrases.Any(ignorePhrase => model.Id.Contains(ignorePhrase, StringComparison.InvariantCulture)) &&
|
|
|
|
|
filterPhrases.All( filter => model.Id.Contains(filter, StringComparison.InvariantCulture)))
|
|
|
|
|
.Select(n => new Provider.Model(n.Id, null));
|
|
|
|
|
}
|
2024-07-03 18:31:04 +00:00
|
|
|
}
|