Feature: Add local LLM vision clients (llama.cpp and Ollama)

Add LlamaCppVisionClient and OllamaVisionClient for local AI inference
as alternatives to OpenAI and Claude. Includes text-only prompt support
for LLM-assisted receipt matching.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-11 16:53:51 -05:00
parent 2c74e5e403
commit dc56021a77
3 changed files with 261 additions and 0 deletions

View File

@@ -165,6 +165,13 @@
<div class="mb-2"> <div class="mb-2">
<label for="model" class="form-label small">AI Model</label> <label for="model" class="form-label small">AI Model</label>
<select name="model" id="model" class="form-select form-select-sm"> <select name="model" id="model" class="form-select form-select-sm">
<optgroup label="Local (llama.cpp)">
<option value="llamacpp:GLM-4.6V-UD-Q4_K_XL-00001-of-00002">GLM-4.6V (Vision)</option>
</optgroup>
<optgroup label="Local (Ollama)">
<option value="ollama:llava">LLaVA (Vision)</option>
<option value="ollama:llava:13b">LLaVA 13B (Vision)</option>
</optgroup>
<optgroup label="OpenAI"> <optgroup label="OpenAI">
<option value="gpt-4o-mini" selected>GPT-4o Mini (Fast & Cheap)</option> <option value="gpt-4o-mini" selected>GPT-4o Mini (Fast & Cheap)</option>
<option value="gpt-4o">GPT-4o (Smarter)</option> <option value="gpt-4o">GPT-4o (Smarter)</option>

View File

@@ -61,6 +61,8 @@ builder.Services.AddScoped<IPdfToImageConverter, PdfToImageConverter>();
// AI vision clients // AI vision clients
builder.Services.AddHttpClient<OpenAIVisionClient>(); builder.Services.AddHttpClient<OpenAIVisionClient>();
builder.Services.AddHttpClient<ClaudeVisionClient>(); builder.Services.AddHttpClient<ClaudeVisionClient>();
builder.Services.AddHttpClient<OllamaVisionClient>();
builder.Services.AddHttpClient<LlamaCppVisionClient>();
builder.Services.AddScoped<IReceiptParser, AIReceiptParser>(); builder.Services.AddScoped<IReceiptParser, AIReceiptParser>();
// AI categorization service // AI categorization service

View File

@@ -216,4 +216,256 @@ namespace MoneyMap.Services
return trimmed; return trimmed;
} }
} }
/// <summary>
/// llama.cpp server client using OpenAI-compatible vision API for local LLM inference.
/// </summary>
public class LlamaCppVisionClient : IAIVisionClient
{
private readonly HttpClient _httpClient;
private readonly IConfiguration _configuration;
private readonly ILogger<LlamaCppVisionClient> _logger;
public LlamaCppVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<LlamaCppVisionClient> logger)
{
_httpClient = httpClient;
_httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
_configuration = configuration;
_logger = logger;
}
/// <summary>
/// Send a text-only prompt to the LLM (no image).
/// </summary>
public async Task<VisionApiResult> SendTextPromptAsync(string prompt, string? model = null)
{
var baseUrl = _configuration["LlamaCpp:BaseUrl"] ?? "http://athena.lan:11434";
var llamaModel = model ?? "GLM-4.6V-UD-Q4_K_XL-00001-of-00002";
if (llamaModel.StartsWith("llamacpp:"))
llamaModel = llamaModel[9..];
_logger.LogInformation("LlamaCpp: Sending text prompt to {BaseUrl} with model {Model}", baseUrl, llamaModel);
var requestBody = new
{
model = llamaModel,
messages = new[]
{
new
{
role = "user",
content = prompt
}
},
max_tokens = 1024,
temperature = 0.1
};
try
{
var json = JsonSerializer.Serialize(requestBody);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString();
_logger.LogInformation("LlamaCpp: Text prompt completed successfully");
return VisionApiResult.Success(CleanJsonResponse(messageContent));
}
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
{
_logger.LogError("llama.cpp request timed out");
return VisionApiResult.Failure("llama.cpp request timed out.");
}
catch (Exception ex)
{
_logger.LogError(ex, "llama.cpp API call failed: {Message}", ex.Message);
return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
}
}
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
{
var baseUrl = _configuration["LlamaCpp:BaseUrl"] ?? "http://athena.lan:8080";
// Strip "llamacpp:" prefix if present
var llamaModel = model.StartsWith("llamacpp:") ? model[9..] : model;
_logger.LogInformation("LlamaCpp: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
baseUrl, llamaModel, base64Image.Length);
var requestBody = new
{
model = llamaModel,
messages = new[]
{
new
{
role = "user",
content = new object[]
{
new
{
type = "image_url",
image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
},
new { type = "text", text = prompt }
}
}
},
max_tokens = 4096,
temperature = 0.1
};
try
{
var json = JsonSerializer.Serialize(requestBody);
_logger.LogDebug("LlamaCpp: Request payload size: {Size} bytes", json.Length);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
_logger.LogDebug("LlamaCpp: Response received, size: {Size} bytes", responseJson.Length);
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString();
_logger.LogInformation("LlamaCpp: Successfully parsed response");
return VisionApiResult.Success(CleanJsonResponse(messageContent));
}
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
{
_logger.LogError("llama.cpp request timed out after 5 minutes");
return VisionApiResult.Failure("llama.cpp request timed out. The model may be too slow or not loaded.");
}
catch (Exception ex)
{
_logger.LogError(ex, "llama.cpp Vision API call failed: {Message}", ex.Message);
return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
}
}
private static string CleanJsonResponse(string? content)
{
var trimmed = content?.Trim() ?? "";
if (trimmed.StartsWith("```json"))
{
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
}
return trimmed;
}
}
/// <summary>
/// Ollama Vision API client for local LLM inference.
/// </summary>
public class OllamaVisionClient : IAIVisionClient
{
private readonly HttpClient _httpClient;
private readonly IConfiguration _configuration;
private readonly ILogger<OllamaVisionClient> _logger;
public OllamaVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OllamaVisionClient> logger)
{
_httpClient = httpClient;
_httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
_configuration = configuration;
_logger = logger;
}
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
{
var baseUrl = _configuration["Ollama:BaseUrl"] ?? "http://athena.lan:11434";
// Strip "ollama:" prefix if present
var ollamaModel = model.StartsWith("ollama:") ? model[7..] : model;
_logger.LogInformation("Ollama: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
baseUrl, ollamaModel, base64Image.Length);
var requestBody = new
{
model = ollamaModel,
prompt = prompt,
images = new[] { base64Image },
stream = false,
options = new
{
temperature = 0.1
}
};
try
{
var json = JsonSerializer.Serialize(requestBody);
_logger.LogDebug("Ollama: Request payload size: {Size} bytes", json.Length);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/api/generate", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("Ollama API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
return VisionApiResult.Failure($"Ollama API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
_logger.LogDebug("Ollama: Response received, size: {Size} bytes", responseJson.Length);
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj.GetProperty("response").GetString();
_logger.LogInformation("Ollama: Successfully parsed response");
return VisionApiResult.Success(CleanJsonResponse(messageContent));
}
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
{
_logger.LogError("Ollama request timed out after 5 minutes");
return VisionApiResult.Failure("Ollama request timed out. The model may be too slow or not loaded.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Ollama Vision API call failed: {Message}", ex.Message);
return VisionApiResult.Failure($"Ollama API error: {ex.Message}");
}
}
private static string CleanJsonResponse(string? content)
{
var trimmed = content?.Trim() ?? "";
if (trimmed.StartsWith("```json"))
{
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
}
return trimmed;
}
}
} }