mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2026-03-29 13:51:37 +00:00
Merge 40dc6fde99 into 1c52d6f199
This commit is contained in:
commit
3ae0930419
@ -1618,6 +1618,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "The selec
|
||||
-- Select a provider first
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Select a provider first"
|
||||
|
||||
-- Estimated amount of tokens:
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Estimated amount of tokens:"
|
||||
|
||||
-- Start new chat in workspace '{0}'
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Start new chat in workspace '{0}'"
|
||||
|
||||
@ -5095,6 +5098,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startup log file
|
||||
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Browse AI Studio's source code on GitHub — we welcome your contributions."
|
||||
|
||||
-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer."
|
||||
|
||||
-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID mismatch: the plugin ID differs from the enterprise configuration ID."
|
||||
|
||||
@ -5329,6 +5335,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "This is a library
|
||||
-- Used .NET SDK
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Used .NET SDK"
|
||||
|
||||
-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate."
|
||||
|
||||
-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated."
|
||||
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
</ChildContent>
|
||||
<FooterContent>
|
||||
<MudElement Style="flex: 0 0 auto;">
|
||||
<MudTextField
|
||||
<UserPromptComponent
|
||||
T="string"
|
||||
@ref="@this.inputField"
|
||||
@bind-Text="@this.userInput"
|
||||
@ -50,8 +50,11 @@
|
||||
Disabled="@this.IsInputForbidden()"
|
||||
Immediate="@true"
|
||||
OnKeyUp="@this.InputKeyEvent"
|
||||
WhenTextChangedAsync="@(_ =>this.CalculateTokenCount())"
|
||||
UserAttributes="@USER_INPUT_ATTRIBUTES"
|
||||
Class="@this.UserInputClass"
|
||||
DebounceTime="TimeSpan.FromSeconds(1)"
|
||||
HelperText="@this.TokenCountMessage"
|
||||
Style="@this.UserInputStyle"/>
|
||||
</MudElement>
|
||||
<MudToolBar WrapContent="true" Gutters="@false" Class="border border-solid rounded" Style="border-color: lightgrey;">
|
||||
@ -124,7 +127,6 @@
|
||||
<MudIconButton Icon="@Icons.Material.Filled.Error" Color="Color.Error"/>
|
||||
</MudTooltip>
|
||||
}
|
||||
<MudIconButton />
|
||||
</MudToolBar>
|
||||
</FooterContent>
|
||||
</InnerScrolling>
|
||||
@ -3,6 +3,7 @@ using AIStudio.Dialogs;
|
||||
using AIStudio.Provider;
|
||||
using AIStudio.Settings;
|
||||
using AIStudio.Settings.DataModel;
|
||||
using AIStudio.Tools.Services;
|
||||
|
||||
using Microsoft.AspNetCore.Components;
|
||||
using Microsoft.AspNetCore.Components.Web;
|
||||
@ -37,6 +38,9 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
|
||||
[Inject]
|
||||
private IDialogService DialogService { get; init; } = null!;
|
||||
|
||||
[Inject]
|
||||
private RustService RustService { get; init; } = null!;
|
||||
|
||||
private const Placement TOOLBAR_TOOLTIP_PLACEMENT = Placement.Top;
|
||||
private static readonly Dictionary<string, object?> USER_INPUT_ATTRIBUTES = new();
|
||||
|
||||
@ -59,10 +63,12 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
|
||||
private Guid currentChatThreadId = Guid.Empty;
|
||||
private CancellationTokenSource? cancellationTokenSource;
|
||||
private HashSet<FileAttachment> chatDocumentPaths = [];
|
||||
private string tokenCount = "0";
|
||||
private string TokenCountMessage => $"{this.T("Estimated amount of tokens:")} {this.tokenCount}";
|
||||
|
||||
// Unfortunately, we need the input field reference to blur the focus away. Without
|
||||
// this, we cannot clear the input field.
|
||||
private MudTextField<string> inputField = null!;
|
||||
private UserPromptComponent<string> inputField = null!;
|
||||
|
||||
#region Overrides of ComponentBase
|
||||
|
||||
@ -449,6 +455,9 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
|
||||
// Was a modifier key pressed as well?
|
||||
var isModifier = keyEvent.AltKey || keyEvent.CtrlKey || keyEvent.MetaKey || keyEvent.ShiftKey;
|
||||
|
||||
if (isEnter)
|
||||
await this.CalculateTokenCount();
|
||||
|
||||
// Depending on the user's settings, might react to shortcuts:
|
||||
switch (this.SettingsManager.ConfigurationData.Chat.ShortcutSendBehavior)
|
||||
{
|
||||
@ -568,6 +577,7 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
|
||||
this.chatDocumentPaths.Clear();
|
||||
|
||||
await this.inputField.BlurAsync();
|
||||
this.tokenCount = "0";
|
||||
|
||||
// Enable the stream state for the chat component:
|
||||
this.isStreaming = true;
|
||||
@ -950,6 +960,20 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private async Task CalculateTokenCount()
|
||||
{
|
||||
if (this.inputField.Value is null)
|
||||
{
|
||||
this.tokenCount = "0";
|
||||
return;
|
||||
}
|
||||
var response = await this.RustService.GetTokenCount(this.inputField.Value);
|
||||
if (response is null)
|
||||
return;
|
||||
this.tokenCount = response.TokenCount.ToString();
|
||||
this.StateHasChanged();
|
||||
}
|
||||
|
||||
#region Overrides of MSGComponentBase
|
||||
|
||||
protected override async Task ProcessIncomingMessage<T>(ComponentBase? sendingComponent, Event triggeredEvent, T? data) where T : default
|
||||
|
||||
68
app/MindWork AI Studio/Components/UserPromptComponent.cs
Normal file
68
app/MindWork AI Studio/Components/UserPromptComponent.cs
Normal file
@ -0,0 +1,68 @@
|
||||
using Microsoft.AspNetCore.Components;
|
||||
using Timer = System.Timers.Timer;
|
||||
|
||||
namespace AIStudio.Components;
|
||||
|
||||
/// <summary>
|
||||
/// Debounced multi-line text input built on <see cref="MudTextField{T}"/>.
|
||||
/// Keeps the base API while adding a debounce timer.
|
||||
/// Callers can override any property as usual.
|
||||
/// </summary>
|
||||
public class UserPromptComponent<T> : MudTextField<T>
|
||||
{
|
||||
[Parameter]
|
||||
public TimeSpan DebounceTime { get; set; } = TimeSpan.FromMilliseconds(800);
|
||||
|
||||
[Parameter]
|
||||
public Func<string, Task> WhenTextChangedAsync { get; set; } = _ => Task.CompletedTask;
|
||||
|
||||
private readonly Timer debounceTimer = new();
|
||||
private string text = string.Empty;
|
||||
private string lastParameterText = string.Empty;
|
||||
private string lastNotifiedText = string.Empty;
|
||||
private bool isInitialized;
|
||||
|
||||
protected override async Task OnInitializedAsync()
|
||||
{
|
||||
this.text = this.Text ?? string.Empty;
|
||||
this.lastParameterText = this.text;
|
||||
this.lastNotifiedText = this.text;
|
||||
this.debounceTimer.AutoReset = false;
|
||||
this.debounceTimer.Interval = this.DebounceTime.TotalMilliseconds;
|
||||
this.debounceTimer.Elapsed += (_, _) =>
|
||||
{
|
||||
this.debounceTimer.Stop();
|
||||
if (this.text == this.lastNotifiedText)
|
||||
return;
|
||||
|
||||
this.lastNotifiedText = this.text;
|
||||
this.InvokeAsync(async () => await this.TextChanged.InvokeAsync(this.text));
|
||||
this.InvokeAsync(async () => await this.WhenTextChangedAsync(this.text));
|
||||
};
|
||||
|
||||
this.isInitialized = true;
|
||||
await base.OnInitializedAsync();
|
||||
}
|
||||
|
||||
protected override async Task OnParametersSetAsync()
|
||||
{
|
||||
// Ensure the timer uses the latest debouncing interval:
|
||||
if (!this.isInitialized)
|
||||
return;
|
||||
|
||||
if(Math.Abs(this.debounceTimer.Interval - this.DebounceTime.TotalMilliseconds) > 1)
|
||||
this.debounceTimer.Interval = this.DebounceTime.TotalMilliseconds;
|
||||
|
||||
// Only sync when the parent's parameter actually changed since the last change:
|
||||
if (this.Text != this.lastParameterText)
|
||||
{
|
||||
this.text = this.Text ?? string.Empty;
|
||||
this.lastParameterText = this.text;
|
||||
}
|
||||
|
||||
this.debounceTimer.Stop();
|
||||
this.debounceTimer.Start();
|
||||
|
||||
await base.OnParametersSetAsync();
|
||||
}
|
||||
}
|
||||
@ -291,6 +291,8 @@
|
||||
<ThirdPartyComponent Name="sysinfo" Developer="Guillaume Gomez & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/GuillaumeGomez/sysinfo/blob/main/LICENSE" RepositoryUrl="https://github.com/GuillaumeGomez/sysinfo" UseCase="@T("This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.")"/>
|
||||
<ThirdPartyComponent Name="tempfile" Developer="Steven Allen, Ashley Mannix & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/Stebalien/tempfile/blob/master/LICENSE-MIT" RepositoryUrl="https://github.com/Stebalien/tempfile" UseCase="@T("This library is used to create temporary folders for saving the certificate and private key for communication with Qdrant.")"/>
|
||||
<ThirdPartyComponent Name="Lua-CSharp" Developer="Yusuke Nakada & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/nuskey8/Lua-CSharp/blob/main/LICENSE" RepositoryUrl="https://github.com/nuskey8/Lua-CSharp" UseCase="@T("We use Lua as the language for plugins. Lua-CSharp lets Lua scripts communicate with AI Studio and vice versa. Thank you, Yusuke Nakada, for this great library.")" />
|
||||
<ThirdPartyComponent Name="DeepSeek-V3.2 Tokenizer" Developer="DeepSeek-AI" LicenseName="MIT" LicenseUrl="https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/mit.md" RepositoryUrl="https://huggingface.co/deepseek-ai/DeepSeek-V3.2/tree/main" UseCase="@T("We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.")" />
|
||||
<ThirdPartyComponent Name="Tokenizer" Developer="Anthony Moi, Nicolas Patry, Pierric Cistac, Arthur Zucker & Open Source Community" LicenseName="Apache-2.0" LicenseUrl="https://github.com/huggingface/tokenizers/blob/main/LICENSE" RepositoryUrl="https://github.com/huggingface/tokenizers" UseCase="@T("The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.")" />
|
||||
<ThirdPartyComponent Name="HtmlAgilityPack" Developer="ZZZ Projects & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE" RepositoryUrl="https://github.com/zzzprojects/html-agility-pack" UseCase="@T("We use the HtmlAgilityPack to extract content from the web. This is necessary, e.g., when you provide a URL as input for an assistant.")"/>
|
||||
<ThirdPartyComponent Name="ReverseMarkdown" Developer="Babu Annamalai & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/mysticmind/reversemarkdown-net/blob/master/LICENSE" RepositoryUrl="https://github.com/mysticmind/reversemarkdown-net" UseCase="@T("This library is used to convert HTML to Markdown. This is necessary, e.g., when you provide a URL as input for an assistant.")"/>
|
||||
<ThirdPartyComponent Name="wikEd diff" Developer="Cacycle & Open Source Community" LicenseName="None (public domain)" LicenseUrl="https://en.wikipedia.org/wiki/User:Cacycle/diff#License" RepositoryUrl="https://en.wikipedia.org/wiki/User:Cacycle/diff" UseCase="@T("This library is used to display the differences between two texts. This is necessary, e.g., for the grammar and spelling assistant.")"/>
|
||||
|
||||
@ -1620,6 +1620,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "Der ausge
|
||||
-- Select a provider first
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Wähle zuerst einen Anbieter aus"
|
||||
|
||||
-- Estimated amount of tokens:
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Geschätzte Anzahl an Tokens:"
|
||||
|
||||
-- Start new chat in workspace "{0}"
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Neuen Chat im Arbeitsbereich \"{0}\" starten"
|
||||
|
||||
@ -5097,6 +5100,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startprotokollda
|
||||
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Sehen Sie sich den Quellcode von AI Studio auf GitHub an – wir freuen uns über ihre Beiträge."
|
||||
|
||||
-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "Die Tokenizer‑Bibliothek dient als Basis‑Framework für die Integration des DeepSeek‑Tokenizers."
|
||||
|
||||
-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID-Konflikt: Die Plugin-ID stimmt nicht mit der ID der Unternehmenskonfiguration überein."
|
||||
|
||||
@ -5331,6 +5337,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "Dies ist eine Bib
|
||||
-- Used .NET SDK
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Verwendetes .NET SDK"
|
||||
|
||||
-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "Wir verwenden den DeepSeek‑Tokenizer, um die Token‑Anzahl einer Eingabe zu schätzen."
|
||||
|
||||
-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "Diese Bibliothek wird verwendet, um Sidecar-Prozesse zu verwalten und sicherzustellen, dass veraltete oder Zombie-Sidecars erkannt und beendet werden."
|
||||
|
||||
|
||||
@ -1620,6 +1620,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "The selec
|
||||
-- Select a provider first
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Select a provider first"
|
||||
|
||||
-- Estimated amount of tokens:
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Estimated amount of tokens:"
|
||||
|
||||
-- Start new chat in workspace "{0}"
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Start new chat in workspace \"{0}\""
|
||||
|
||||
@ -5097,6 +5100,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startup log file
|
||||
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Browse AI Studio's source code on GitHub — we welcome your contributions."
|
||||
|
||||
-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer."
|
||||
|
||||
-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID mismatch: the plugin ID differs from the enterprise configuration ID."
|
||||
|
||||
@ -5331,6 +5337,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "This is a library
|
||||
-- Used .NET SDK
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Used .NET SDK"
|
||||
|
||||
-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate."
|
||||
|
||||
-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
|
||||
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated."
|
||||
|
||||
|
||||
6
app/MindWork AI Studio/Tools/Rust/TokenCountInfo.cs
Normal file
6
app/MindWork AI Studio/Tools/Rust/TokenCountInfo.cs
Normal file
@ -0,0 +1,6 @@
|
||||
namespace AIStudio.Tools.Rust;
|
||||
|
||||
public sealed class TokenCountInfo
|
||||
{
|
||||
public int TokenCount { get; set; }
|
||||
}
|
||||
@ -0,0 +1,27 @@
|
||||
using AIStudio.Tools.Rust;
|
||||
|
||||
namespace AIStudio.Tools.Services;
|
||||
|
||||
public sealed partial class RustService
|
||||
{
|
||||
public async Task<TokenCountInfo?> GetTokenCount(string text)
|
||||
{
|
||||
try
|
||||
{
|
||||
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
|
||||
var payload = new { text };
|
||||
var response = await this.http.PostAsJsonAsync("/system/tokenizer/count", payload, this.jsonRustSerializerOptions, cts.Token);
|
||||
response.EnsureSuccessStatusCode();
|
||||
return await response.Content.ReadFromJsonAsync<TokenCountInfo>(this.jsonRustSerializerOptions, cancellationToken: cts.Token);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
if(this.logger is not null)
|
||||
this.logger.LogError(e, "Error while getting token count from Rust service.");
|
||||
else
|
||||
Console.WriteLine($"Error while getting token count from Rust service: '{e}'.");
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -42,6 +42,7 @@ pptx-to-md = "0.4.0"
|
||||
tempfile = "3.8"
|
||||
strum_macros = "0.27"
|
||||
sysinfo = "0.38.0"
|
||||
tokenizers = "0.22.2"
|
||||
|
||||
# Fixes security vulnerability downstream, where the upstream is not fixed yet:
|
||||
time = "0.3.47" # -> Rocket
|
||||
|
||||
@ -18,3 +18,4 @@ pub mod certificate_factory;
|
||||
pub mod runtime_api_token;
|
||||
pub mod stale_process_cleanup;
|
||||
mod sidecar_types;
|
||||
pub mod tokenizer;
|
||||
@ -11,7 +11,7 @@ use mindwork_ai_studio::environment::is_dev;
|
||||
use mindwork_ai_studio::log::init_logging;
|
||||
use mindwork_ai_studio::metadata::MetaData;
|
||||
use mindwork_ai_studio::runtime_api::start_runtime_api;
|
||||
|
||||
use mindwork_ai_studio::tokenizer::{init_tokenizer};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
@ -43,6 +43,10 @@ async fn main() {
|
||||
info!("Running in production mode.");
|
||||
}
|
||||
|
||||
if let Err(e) = init_tokenizer() {
|
||||
warn!(Source = "Tokenizer"; "Error during the initialisation of the tokenizer: {}", e);
|
||||
}
|
||||
|
||||
generate_runtime_certificate();
|
||||
start_runtime_api();
|
||||
|
||||
|
||||
@ -89,6 +89,7 @@ pub fn start_runtime_api() {
|
||||
crate::file_data::extract_data,
|
||||
crate::log::get_log_paths,
|
||||
crate::log::log_event,
|
||||
crate::tokenizer::tokenizer_count,
|
||||
crate::app_window::register_shortcut,
|
||||
crate::app_window::validate_shortcut,
|
||||
crate::app_window::suspend_shortcuts,
|
||||
|
||||
54
runtime/src/tokenizer.rs
Normal file
54
runtime/src/tokenizer.rs
Normal file
@ -0,0 +1,54 @@
|
||||
use std::fs;
|
||||
use std::path::{PathBuf};
|
||||
use std::sync::OnceLock;
|
||||
use rocket::{post};
|
||||
use rocket::serde::json::Json;
|
||||
use rocket::serde::Serialize;
|
||||
use serde::Deserialize;
|
||||
use tokenizers::Error;
|
||||
use tokenizers::tokenizer::Tokenizer;
|
||||
use crate::api_token::APIToken;
|
||||
|
||||
static TOKENIZER: OnceLock<Tokenizer> = OnceLock::new();
|
||||
|
||||
static TEXT: &str = "";
|
||||
|
||||
pub fn init_tokenizer() -> Result<(), Error>{
|
||||
let mut target_dir = PathBuf::from("target");
|
||||
target_dir.push("tokenizers");
|
||||
fs::create_dir_all(&target_dir)?;
|
||||
|
||||
let mut local_tokenizer_path = target_dir.clone();
|
||||
local_tokenizer_path.push("tokenizer.json");
|
||||
|
||||
TOKENIZER.set(Tokenizer::from_file(local_tokenizer_path)?).expect("Could not set the tokenizer.");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_token_count(mut text: &str) -> usize {
|
||||
if text.is_empty() {
|
||||
text = TEXT;
|
||||
}
|
||||
match TOKENIZER.get().unwrap().encode(text, true) {
|
||||
Ok(encoding) => encoding.len(),
|
||||
Err(_) => 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct SetTokenText {
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct GetTokenCount{
|
||||
token_count: usize,
|
||||
}
|
||||
|
||||
|
||||
#[post("/system/tokenizer/count", data = "<req>")]
|
||||
pub fn tokenizer_count(_token: APIToken, req: Json<SetTokenText>) -> Json<GetTokenCount> {
|
||||
Json(GetTokenCount {
|
||||
token_count: get_token_count(&req.text),
|
||||
})
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user