Feature: Add local LLM vision clients (llama.cpp and Ollama)
Add LlamaCppVisionClient and OllamaVisionClient for local AI inference as alternatives to OpenAI and Claude. Includes text-only prompt support for LLM-assisted receipt matching. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -165,6 +165,13 @@
|
||||
<div class="mb-2">
|
||||
<label for="model" class="form-label small">AI Model</label>
|
||||
<select name="model" id="model" class="form-select form-select-sm">
|
||||
<optgroup label="Local (llama.cpp)">
|
||||
<option value="llamacpp:GLM-4.6V-UD-Q4_K_XL-00001-of-00002">GLM-4.6V (Vision)</option>
|
||||
</optgroup>
|
||||
<optgroup label="Local (Ollama)">
|
||||
<option value="ollama:llava">LLaVA (Vision)</option>
|
||||
<option value="ollama:llava:13b">LLaVA 13B (Vision)</option>
|
||||
</optgroup>
|
||||
<optgroup label="OpenAI">
|
||||
<option value="gpt-4o-mini" selected>GPT-4o Mini (Fast & Cheap)</option>
|
||||
<option value="gpt-4o">GPT-4o (Smarter)</option>
|
||||
|
||||
@@ -61,6 +61,8 @@ builder.Services.AddScoped<IPdfToImageConverter, PdfToImageConverter>();
|
||||
// AI vision clients
|
||||
builder.Services.AddHttpClient<OpenAIVisionClient>();
|
||||
builder.Services.AddHttpClient<ClaudeVisionClient>();
|
||||
builder.Services.AddHttpClient<OllamaVisionClient>();
|
||||
builder.Services.AddHttpClient<LlamaCppVisionClient>();
|
||||
builder.Services.AddScoped<IReceiptParser, AIReceiptParser>();
|
||||
|
||||
// AI categorization service
|
||||
|
||||
@@ -216,4 +216,256 @@ namespace MoneyMap.Services
|
||||
return trimmed;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// llama.cpp server client using OpenAI-compatible vision API for local LLM inference.
|
||||
/// </summary>
|
||||
public class LlamaCppVisionClient : IAIVisionClient
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly IConfiguration _configuration;
|
||||
private readonly ILogger<LlamaCppVisionClient> _logger;
|
||||
|
||||
public LlamaCppVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<LlamaCppVisionClient> logger)
|
||||
{
|
||||
_httpClient = httpClient;
|
||||
_httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
|
||||
_configuration = configuration;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Send a text-only prompt to the LLM (no image).
|
||||
/// </summary>
|
||||
public async Task<VisionApiResult> SendTextPromptAsync(string prompt, string? model = null)
|
||||
{
|
||||
var baseUrl = _configuration["LlamaCpp:BaseUrl"] ?? "http://athena.lan:11434";
|
||||
var llamaModel = model ?? "GLM-4.6V-UD-Q4_K_XL-00001-of-00002";
|
||||
if (llamaModel.StartsWith("llamacpp:"))
|
||||
llamaModel = llamaModel[9..];
|
||||
|
||||
_logger.LogInformation("LlamaCpp: Sending text prompt to {BaseUrl} with model {Model}", baseUrl, llamaModel);
|
||||
|
||||
var requestBody = new
|
||||
{
|
||||
model = llamaModel,
|
||||
messages = new[]
|
||||
{
|
||||
new
|
||||
{
|
||||
role = "user",
|
||||
content = prompt
|
||||
}
|
||||
},
|
||||
max_tokens = 1024,
|
||||
temperature = 0.1
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var json = JsonSerializer.Serialize(requestBody);
|
||||
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
||||
|
||||
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var errorContent = await response.Content.ReadAsStringAsync();
|
||||
_logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
||||
return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
|
||||
}
|
||||
|
||||
var responseJson = await response.Content.ReadAsStringAsync();
|
||||
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
||||
|
||||
var messageContent = responseObj
|
||||
.GetProperty("choices")[0]
|
||||
.GetProperty("message")
|
||||
.GetProperty("content")
|
||||
.GetString();
|
||||
|
||||
_logger.LogInformation("LlamaCpp: Text prompt completed successfully");
|
||||
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
||||
}
|
||||
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
|
||||
{
|
||||
_logger.LogError("llama.cpp request timed out");
|
||||
return VisionApiResult.Failure("llama.cpp request timed out.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "llama.cpp API call failed: {Message}", ex.Message);
|
||||
return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
|
||||
{
|
||||
var baseUrl = _configuration["LlamaCpp:BaseUrl"] ?? "http://athena.lan:8080";
|
||||
|
||||
// Strip "llamacpp:" prefix if present
|
||||
var llamaModel = model.StartsWith("llamacpp:") ? model[9..] : model;
|
||||
|
||||
_logger.LogInformation("LlamaCpp: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
|
||||
baseUrl, llamaModel, base64Image.Length);
|
||||
|
||||
var requestBody = new
|
||||
{
|
||||
model = llamaModel,
|
||||
messages = new[]
|
||||
{
|
||||
new
|
||||
{
|
||||
role = "user",
|
||||
content = new object[]
|
||||
{
|
||||
new
|
||||
{
|
||||
type = "image_url",
|
||||
image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
|
||||
},
|
||||
new { type = "text", text = prompt }
|
||||
}
|
||||
}
|
||||
},
|
||||
max_tokens = 4096,
|
||||
temperature = 0.1
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var json = JsonSerializer.Serialize(requestBody);
|
||||
_logger.LogDebug("LlamaCpp: Request payload size: {Size} bytes", json.Length);
|
||||
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
||||
|
||||
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var errorContent = await response.Content.ReadAsStringAsync();
|
||||
_logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
||||
return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
|
||||
}
|
||||
|
||||
var responseJson = await response.Content.ReadAsStringAsync();
|
||||
_logger.LogDebug("LlamaCpp: Response received, size: {Size} bytes", responseJson.Length);
|
||||
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
||||
|
||||
var messageContent = responseObj
|
||||
.GetProperty("choices")[0]
|
||||
.GetProperty("message")
|
||||
.GetProperty("content")
|
||||
.GetString();
|
||||
|
||||
_logger.LogInformation("LlamaCpp: Successfully parsed response");
|
||||
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
||||
}
|
||||
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
|
||||
{
|
||||
_logger.LogError("llama.cpp request timed out after 5 minutes");
|
||||
return VisionApiResult.Failure("llama.cpp request timed out. The model may be too slow or not loaded.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "llama.cpp Vision API call failed: {Message}", ex.Message);
|
||||
return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private static string CleanJsonResponse(string? content)
|
||||
{
|
||||
var trimmed = content?.Trim() ?? "";
|
||||
if (trimmed.StartsWith("```json"))
|
||||
{
|
||||
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Ollama Vision API client for local LLM inference.
|
||||
/// </summary>
|
||||
public class OllamaVisionClient : IAIVisionClient
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly IConfiguration _configuration;
|
||||
private readonly ILogger<OllamaVisionClient> _logger;
|
||||
|
||||
public OllamaVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OllamaVisionClient> logger)
|
||||
{
|
||||
_httpClient = httpClient;
|
||||
_httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
|
||||
_configuration = configuration;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
|
||||
{
|
||||
var baseUrl = _configuration["Ollama:BaseUrl"] ?? "http://athena.lan:11434";
|
||||
|
||||
// Strip "ollama:" prefix if present
|
||||
var ollamaModel = model.StartsWith("ollama:") ? model[7..] : model;
|
||||
|
||||
_logger.LogInformation("Ollama: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
|
||||
baseUrl, ollamaModel, base64Image.Length);
|
||||
|
||||
var requestBody = new
|
||||
{
|
||||
model = ollamaModel,
|
||||
prompt = prompt,
|
||||
images = new[] { base64Image },
|
||||
stream = false,
|
||||
options = new
|
||||
{
|
||||
temperature = 0.1
|
||||
}
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var json = JsonSerializer.Serialize(requestBody);
|
||||
_logger.LogDebug("Ollama: Request payload size: {Size} bytes", json.Length);
|
||||
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
||||
|
||||
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/api/generate", content);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var errorContent = await response.Content.ReadAsStringAsync();
|
||||
_logger.LogError("Ollama API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
||||
return VisionApiResult.Failure($"Ollama API error ({response.StatusCode}): {errorContent}");
|
||||
}
|
||||
|
||||
var responseJson = await response.Content.ReadAsStringAsync();
|
||||
_logger.LogDebug("Ollama: Response received, size: {Size} bytes", responseJson.Length);
|
||||
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
||||
|
||||
var messageContent = responseObj.GetProperty("response").GetString();
|
||||
|
||||
_logger.LogInformation("Ollama: Successfully parsed response");
|
||||
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
||||
}
|
||||
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
|
||||
{
|
||||
_logger.LogError("Ollama request timed out after 5 minutes");
|
||||
return VisionApiResult.Failure("Ollama request timed out. The model may be too slow or not loaded.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Ollama Vision API call failed: {Message}", ex.Message);
|
||||
return VisionApiResult.Failure($"Ollama API error: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private static string CleanJsonResponse(string? content)
|
||||
{
|
||||
var trimmed = content?.Trim() ?? "";
|
||||
if (trimmed.StartsWith("```json"))
|
||||
{
|
||||
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user