Feature: Add local LLM vision clients (llama.cpp and Ollama)
Add LlamaCppVisionClient and OllamaVisionClient for local AI inference as alternatives to OpenAI and Claude. Includes text-only prompt support for LLM-assisted receipt matching. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -165,6 +165,13 @@
|
|||||||
<div class="mb-2">
|
<div class="mb-2">
|
||||||
<label for="model" class="form-label small">AI Model</label>
|
<label for="model" class="form-label small">AI Model</label>
|
||||||
<select name="model" id="model" class="form-select form-select-sm">
|
<select name="model" id="model" class="form-select form-select-sm">
|
||||||
|
<optgroup label="Local (llama.cpp)">
|
||||||
|
<option value="llamacpp:GLM-4.6V-UD-Q4_K_XL-00001-of-00002">GLM-4.6V (Vision)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="Local (Ollama)">
|
||||||
|
<option value="ollama:llava">LLaVA (Vision)</option>
|
||||||
|
<option value="ollama:llava:13b">LLaVA 13B (Vision)</option>
|
||||||
|
</optgroup>
|
||||||
<optgroup label="OpenAI">
|
<optgroup label="OpenAI">
|
||||||
<option value="gpt-4o-mini" selected>GPT-4o Mini (Fast & Cheap)</option>
|
<option value="gpt-4o-mini" selected>GPT-4o Mini (Fast & Cheap)</option>
|
||||||
<option value="gpt-4o">GPT-4o (Smarter)</option>
|
<option value="gpt-4o">GPT-4o (Smarter)</option>
|
||||||
|
|||||||
@@ -61,6 +61,8 @@ builder.Services.AddScoped<IPdfToImageConverter, PdfToImageConverter>();
|
|||||||
// AI vision clients
|
// AI vision clients
|
||||||
builder.Services.AddHttpClient<OpenAIVisionClient>();
|
builder.Services.AddHttpClient<OpenAIVisionClient>();
|
||||||
builder.Services.AddHttpClient<ClaudeVisionClient>();
|
builder.Services.AddHttpClient<ClaudeVisionClient>();
|
||||||
|
builder.Services.AddHttpClient<OllamaVisionClient>();
|
||||||
|
builder.Services.AddHttpClient<LlamaCppVisionClient>();
|
||||||
builder.Services.AddScoped<IReceiptParser, AIReceiptParser>();
|
builder.Services.AddScoped<IReceiptParser, AIReceiptParser>();
|
||||||
|
|
||||||
// AI categorization service
|
// AI categorization service
|
||||||
|
|||||||
@@ -216,4 +216,256 @@ namespace MoneyMap.Services
|
|||||||
return trimmed;
|
return trimmed;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// llama.cpp server client using OpenAI-compatible vision API for local LLM inference.
|
||||||
|
/// </summary>
|
||||||
|
public class LlamaCppVisionClient : IAIVisionClient
|
||||||
|
{
|
||||||
|
private readonly HttpClient _httpClient;
|
||||||
|
private readonly IConfiguration _configuration;
|
||||||
|
private readonly ILogger<LlamaCppVisionClient> _logger;
|
||||||
|
|
||||||
|
public LlamaCppVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<LlamaCppVisionClient> logger)
|
||||||
|
{
|
||||||
|
_httpClient = httpClient;
|
||||||
|
_httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
|
||||||
|
_configuration = configuration;
|
||||||
|
_logger = logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Send a text-only prompt to the LLM (no image).
|
||||||
|
/// </summary>
|
||||||
|
public async Task<VisionApiResult> SendTextPromptAsync(string prompt, string? model = null)
|
||||||
|
{
|
||||||
|
var baseUrl = _configuration["LlamaCpp:BaseUrl"] ?? "http://athena.lan:11434";
|
||||||
|
var llamaModel = model ?? "GLM-4.6V-UD-Q4_K_XL-00001-of-00002";
|
||||||
|
if (llamaModel.StartsWith("llamacpp:"))
|
||||||
|
llamaModel = llamaModel[9..];
|
||||||
|
|
||||||
|
_logger.LogInformation("LlamaCpp: Sending text prompt to {BaseUrl} with model {Model}", baseUrl, llamaModel);
|
||||||
|
|
||||||
|
var requestBody = new
|
||||||
|
{
|
||||||
|
model = llamaModel,
|
||||||
|
messages = new[]
|
||||||
|
{
|
||||||
|
new
|
||||||
|
{
|
||||||
|
role = "user",
|
||||||
|
content = prompt
|
||||||
|
}
|
||||||
|
},
|
||||||
|
max_tokens = 1024,
|
||||||
|
temperature = 0.1
|
||||||
|
};
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var json = JsonSerializer.Serialize(requestBody);
|
||||||
|
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
||||||
|
|
||||||
|
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
|
||||||
|
|
||||||
|
if (!response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var errorContent = await response.Content.ReadAsStringAsync();
|
||||||
|
_logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
||||||
|
return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
|
||||||
|
}
|
||||||
|
|
||||||
|
var responseJson = await response.Content.ReadAsStringAsync();
|
||||||
|
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
||||||
|
|
||||||
|
var messageContent = responseObj
|
||||||
|
.GetProperty("choices")[0]
|
||||||
|
.GetProperty("message")
|
||||||
|
.GetProperty("content")
|
||||||
|
.GetString();
|
||||||
|
|
||||||
|
_logger.LogInformation("LlamaCpp: Text prompt completed successfully");
|
||||||
|
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
||||||
|
}
|
||||||
|
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
|
||||||
|
{
|
||||||
|
_logger.LogError("llama.cpp request timed out");
|
||||||
|
return VisionApiResult.Failure("llama.cpp request timed out.");
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
_logger.LogError(ex, "llama.cpp API call failed: {Message}", ex.Message);
|
||||||
|
return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
|
||||||
|
{
|
||||||
|
var baseUrl = _configuration["LlamaCpp:BaseUrl"] ?? "http://athena.lan:8080";
|
||||||
|
|
||||||
|
// Strip "llamacpp:" prefix if present
|
||||||
|
var llamaModel = model.StartsWith("llamacpp:") ? model[9..] : model;
|
||||||
|
|
||||||
|
_logger.LogInformation("LlamaCpp: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
|
||||||
|
baseUrl, llamaModel, base64Image.Length);
|
||||||
|
|
||||||
|
var requestBody = new
|
||||||
|
{
|
||||||
|
model = llamaModel,
|
||||||
|
messages = new[]
|
||||||
|
{
|
||||||
|
new
|
||||||
|
{
|
||||||
|
role = "user",
|
||||||
|
content = new object[]
|
||||||
|
{
|
||||||
|
new
|
||||||
|
{
|
||||||
|
type = "image_url",
|
||||||
|
image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
|
||||||
|
},
|
||||||
|
new { type = "text", text = prompt }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
max_tokens = 4096,
|
||||||
|
temperature = 0.1
|
||||||
|
};
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var json = JsonSerializer.Serialize(requestBody);
|
||||||
|
_logger.LogDebug("LlamaCpp: Request payload size: {Size} bytes", json.Length);
|
||||||
|
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
||||||
|
|
||||||
|
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
|
||||||
|
|
||||||
|
if (!response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var errorContent = await response.Content.ReadAsStringAsync();
|
||||||
|
_logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
||||||
|
return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
|
||||||
|
}
|
||||||
|
|
||||||
|
var responseJson = await response.Content.ReadAsStringAsync();
|
||||||
|
_logger.LogDebug("LlamaCpp: Response received, size: {Size} bytes", responseJson.Length);
|
||||||
|
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
||||||
|
|
||||||
|
var messageContent = responseObj
|
||||||
|
.GetProperty("choices")[0]
|
||||||
|
.GetProperty("message")
|
||||||
|
.GetProperty("content")
|
||||||
|
.GetString();
|
||||||
|
|
||||||
|
_logger.LogInformation("LlamaCpp: Successfully parsed response");
|
||||||
|
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
||||||
|
}
|
||||||
|
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
|
||||||
|
{
|
||||||
|
_logger.LogError("llama.cpp request timed out after 5 minutes");
|
||||||
|
return VisionApiResult.Failure("llama.cpp request timed out. The model may be too slow or not loaded.");
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
_logger.LogError(ex, "llama.cpp Vision API call failed: {Message}", ex.Message);
|
||||||
|
return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string CleanJsonResponse(string? content)
|
||||||
|
{
|
||||||
|
var trimmed = content?.Trim() ?? "";
|
||||||
|
if (trimmed.StartsWith("```json"))
|
||||||
|
{
|
||||||
|
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
|
||||||
|
}
|
||||||
|
return trimmed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Ollama Vision API client for local LLM inference.
|
||||||
|
/// </summary>
|
||||||
|
public class OllamaVisionClient : IAIVisionClient
|
||||||
|
{
|
||||||
|
private readonly HttpClient _httpClient;
|
||||||
|
private readonly IConfiguration _configuration;
|
||||||
|
private readonly ILogger<OllamaVisionClient> _logger;
|
||||||
|
|
||||||
|
public OllamaVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OllamaVisionClient> logger)
|
||||||
|
{
|
||||||
|
_httpClient = httpClient;
|
||||||
|
_httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
|
||||||
|
_configuration = configuration;
|
||||||
|
_logger = logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
|
||||||
|
{
|
||||||
|
var baseUrl = _configuration["Ollama:BaseUrl"] ?? "http://athena.lan:11434";
|
||||||
|
|
||||||
|
// Strip "ollama:" prefix if present
|
||||||
|
var ollamaModel = model.StartsWith("ollama:") ? model[7..] : model;
|
||||||
|
|
||||||
|
_logger.LogInformation("Ollama: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
|
||||||
|
baseUrl, ollamaModel, base64Image.Length);
|
||||||
|
|
||||||
|
var requestBody = new
|
||||||
|
{
|
||||||
|
model = ollamaModel,
|
||||||
|
prompt = prompt,
|
||||||
|
images = new[] { base64Image },
|
||||||
|
stream = false,
|
||||||
|
options = new
|
||||||
|
{
|
||||||
|
temperature = 0.1
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var json = JsonSerializer.Serialize(requestBody);
|
||||||
|
_logger.LogDebug("Ollama: Request payload size: {Size} bytes", json.Length);
|
||||||
|
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
||||||
|
|
||||||
|
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/api/generate", content);
|
||||||
|
|
||||||
|
if (!response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
var errorContent = await response.Content.ReadAsStringAsync();
|
||||||
|
_logger.LogError("Ollama API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
||||||
|
return VisionApiResult.Failure($"Ollama API error ({response.StatusCode}): {errorContent}");
|
||||||
|
}
|
||||||
|
|
||||||
|
var responseJson = await response.Content.ReadAsStringAsync();
|
||||||
|
_logger.LogDebug("Ollama: Response received, size: {Size} bytes", responseJson.Length);
|
||||||
|
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
||||||
|
|
||||||
|
var messageContent = responseObj.GetProperty("response").GetString();
|
||||||
|
|
||||||
|
_logger.LogInformation("Ollama: Successfully parsed response");
|
||||||
|
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
||||||
|
}
|
||||||
|
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
|
||||||
|
{
|
||||||
|
_logger.LogError("Ollama request timed out after 5 minutes");
|
||||||
|
return VisionApiResult.Failure("Ollama request timed out. The model may be too slow or not loaded.");
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
_logger.LogError(ex, "Ollama Vision API call failed: {Message}", ex.Message);
|
||||||
|
return VisionApiResult.Failure($"Ollama API error: {ex.Message}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string CleanJsonResponse(string? content)
|
||||||
|
{
|
||||||
|
var trimmed = content?.Trim() ?? "";
|
||||||
|
if (trimmed.StartsWith("```json"))
|
||||||
|
{
|
||||||
|
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
|
||||||
|
}
|
||||||
|
return trimmed;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user