Feature: Add local LLM vision clients (llama.cpp and Ollama)

Add LlamaCppVisionClient and OllamaVisionClient for local AI inference as alternatives to OpenAI and Claude. Includes text-only prompt support for LLM-assisted receipt matching. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 16:53:51 -05:00
parent 2c74e5e403
commit dc56021a77
3 changed files with 261 additions and 0 deletions
--- a/MoneyMap/Pages/ViewReceipt.cshtml
+++ b/MoneyMap/Pages/ViewReceipt.cshtml
@@ -165,6 +165,13 @@
                        <div class="mb-2">
                            <label for="model" class="form-label small">AI Model</label>
                            <select name="model" id="model" class="form-select form-select-sm">
+                                <optgroup label="Local (llama.cpp)">
+                                    <option value="llamacpp:GLM-4.6V-UD-Q4_K_XL-00001-of-00002">GLM-4.6V (Vision)</option>
+                                </optgroup>
+                                <optgroup label="Local (Ollama)">
+                                    <option value="ollama:llava">LLaVA (Vision)</option>
+                                    <option value="ollama:llava:13b">LLaVA 13B (Vision)</option>
+                                </optgroup>
                                <optgroup label="OpenAI">
                                    <option value="gpt-4o-mini" selected>GPT-4o Mini (Fast & Cheap)</option>
                                    <option value="gpt-4o">GPT-4o (Smarter)</option>
--- a/MoneyMap/Program.cs
+++ b/MoneyMap/Program.cs
@@ -61,6 +61,8 @@ builder.Services.AddScoped<IPdfToImageConverter, PdfToImageConverter>();
 // AI vision clients
 builder.Services.AddHttpClient<OpenAIVisionClient>();
 builder.Services.AddHttpClient<ClaudeVisionClient>();
+builder.Services.AddHttpClient<OllamaVisionClient>();
+builder.Services.AddHttpClient<LlamaCppVisionClient>();
 builder.Services.AddScoped<IReceiptParser, AIReceiptParser>();

 // AI categorization service
--- a/MoneyMap/Services/AIVisionClient.cs
+++ b/MoneyMap/Services/AIVisionClient.cs
@@ -216,4 +216,256 @@ namespace MoneyMap.Services
            return trimmed;
        }
    }
+
+    /// <summary>
+    /// llama.cpp server client using OpenAI-compatible vision API for local LLM inference.
+    /// </summary>
+    public class LlamaCppVisionClient : IAIVisionClient
+    {
+        private readonly HttpClient _httpClient;
+        private readonly IConfiguration _configuration;
+        private readonly ILogger<LlamaCppVisionClient> _logger;
+
+        public LlamaCppVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<LlamaCppVisionClient> logger)
+        {
+            _httpClient = httpClient;
+            _httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
+            _configuration = configuration;
+            _logger = logger;
+        }
+
+        /// <summary>
+        /// Send a text-only prompt to the LLM (no image).
+        /// </summary>
+        public async Task<VisionApiResult> SendTextPromptAsync(string prompt, string? model = null)
+        {
+            var baseUrl = _configuration["LlamaCpp:BaseUrl"] ?? "http://athena.lan:11434";
+            var llamaModel = model ?? "GLM-4.6V-UD-Q4_K_XL-00001-of-00002";
+            if (llamaModel.StartsWith("llamacpp:"))
+                llamaModel = llamaModel[9..];
+
+            _logger.LogInformation("LlamaCpp: Sending text prompt to {BaseUrl} with model {Model}", baseUrl, llamaModel);
+
+            var requestBody = new
+            {
+                model = llamaModel,
+                messages = new[]
+                {
+                    new
+                    {
+                        role = "user",
+                        content = prompt
+                    }
+                },
+                max_tokens = 1024,
+                temperature = 0.1
+            };
+
+            try
+            {
+                var json = JsonSerializer.Serialize(requestBody);
+                var content = new StringContent(json, Encoding.UTF8, "application/json");
+
+                var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
+
+                if (!response.IsSuccessStatusCode)
+                {
+                    var errorContent = await response.Content.ReadAsStringAsync();
+                    _logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
+                    return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
+                }
+
+                var responseJson = await response.Content.ReadAsStringAsync();
+                var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
+
+                var messageContent = responseObj
+                    .GetProperty("choices")[0]
+                    .GetProperty("message")
+                    .GetProperty("content")
+                    .GetString();
+
+                _logger.LogInformation("LlamaCpp: Text prompt completed successfully");
+                return VisionApiResult.Success(CleanJsonResponse(messageContent));
+            }
+            catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
+            {
+                _logger.LogError("llama.cpp request timed out");
+                return VisionApiResult.Failure("llama.cpp request timed out.");
+            }
+            catch (Exception ex)
+            {
+                _logger.LogError(ex, "llama.cpp API call failed: {Message}", ex.Message);
+                return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
+            }
+        }
+
+        public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
+        {
+            var baseUrl = _configuration["LlamaCpp:BaseUrl"] ?? "http://athena.lan:8080";
+
+            // Strip "llamacpp:" prefix if present
+            var llamaModel = model.StartsWith("llamacpp:") ? model[9..] : model;
+
+            _logger.LogInformation("LlamaCpp: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
+                baseUrl, llamaModel, base64Image.Length);
+
+            var requestBody = new
+            {
+                model = llamaModel,
+                messages = new[]
+                {
+                    new
+                    {
+                        role = "user",
+                        content = new object[]
+                        {
+                            new
+                            {
+                                type = "image_url",
+                                image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
+                            },
+                            new { type = "text", text = prompt }
+                        }
+                    }
+                },
+                max_tokens = 4096,
+                temperature = 0.1
+            };
+
+            try
+            {
+                var json = JsonSerializer.Serialize(requestBody);
+                _logger.LogDebug("LlamaCpp: Request payload size: {Size} bytes", json.Length);
+                var content = new StringContent(json, Encoding.UTF8, "application/json");
+
+                var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
+
+                if (!response.IsSuccessStatusCode)
+                {
+                    var errorContent = await response.Content.ReadAsStringAsync();
+                    _logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
+                    return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
+                }
+
+                var responseJson = await response.Content.ReadAsStringAsync();
+                _logger.LogDebug("LlamaCpp: Response received, size: {Size} bytes", responseJson.Length);
+                var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
+
+                var messageContent = responseObj
+                    .GetProperty("choices")[0]
+                    .GetProperty("message")
+                    .GetProperty("content")
+                    .GetString();
+
+                _logger.LogInformation("LlamaCpp: Successfully parsed response");
+                return VisionApiResult.Success(CleanJsonResponse(messageContent));
+            }
+            catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
+            {
+                _logger.LogError("llama.cpp request timed out after 5 minutes");
+                return VisionApiResult.Failure("llama.cpp request timed out. The model may be too slow or not loaded.");
+            }
+            catch (Exception ex)
+            {
+                _logger.LogError(ex, "llama.cpp Vision API call failed: {Message}", ex.Message);
+                return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
+            }
+        }
+
+        private static string CleanJsonResponse(string? content)
+        {
+            var trimmed = content?.Trim() ?? "";
+            if (trimmed.StartsWith("```json"))
+            {
+                trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
+            }
+            return trimmed;
+        }
+    }
+
+    /// <summary>
+    /// Ollama Vision API client for local LLM inference.
+    /// </summary>
+    public class OllamaVisionClient : IAIVisionClient
+    {
+        private readonly HttpClient _httpClient;
+        private readonly IConfiguration _configuration;
+        private readonly ILogger<OllamaVisionClient> _logger;
+
+        public OllamaVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OllamaVisionClient> logger)
+        {
+            _httpClient = httpClient;
+            _httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
+            _configuration = configuration;
+            _logger = logger;
+        }
+
+        public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
+        {
+            var baseUrl = _configuration["Ollama:BaseUrl"] ?? "http://athena.lan:11434";
+
+            // Strip "ollama:" prefix if present
+            var ollamaModel = model.StartsWith("ollama:") ? model[7..] : model;
+
+            _logger.LogInformation("Ollama: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
+                baseUrl, ollamaModel, base64Image.Length);
+
+            var requestBody = new
+            {
+                model = ollamaModel,
+                prompt = prompt,
+                images = new[] { base64Image },
+                stream = false,
+                options = new
+                {
+                    temperature = 0.1
+                }
+            };
+
+            try
+            {
+                var json = JsonSerializer.Serialize(requestBody);
+                _logger.LogDebug("Ollama: Request payload size: {Size} bytes", json.Length);
+                var content = new StringContent(json, Encoding.UTF8, "application/json");
+
+                var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/api/generate", content);
+
+                if (!response.IsSuccessStatusCode)
+                {
+                    var errorContent = await response.Content.ReadAsStringAsync();
+                    _logger.LogError("Ollama API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
+                    return VisionApiResult.Failure($"Ollama API error ({response.StatusCode}): {errorContent}");
+                }
+
+                var responseJson = await response.Content.ReadAsStringAsync();
+                _logger.LogDebug("Ollama: Response received, size: {Size} bytes", responseJson.Length);
+                var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
+
+                var messageContent = responseObj.GetProperty("response").GetString();
+
+                _logger.LogInformation("Ollama: Successfully parsed response");
+                return VisionApiResult.Success(CleanJsonResponse(messageContent));
+            }
+            catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
+            {
+                _logger.LogError("Ollama request timed out after 5 minutes");
+                return VisionApiResult.Failure("Ollama request timed out. The model may be too slow or not loaded.");
+            }
+            catch (Exception ex)
+            {
+                _logger.LogError(ex, "Ollama Vision API call failed: {Message}", ex.Message);
+                return VisionApiResult.Failure($"Ollama API error: {ex.Message}");
+            }
+        }
+
+        private static string CleanJsonResponse(string? content)
+        {
+            var trimmed = content?.Trim() ?? "";
+            if (trimmed.StartsWith("```json"))
+            {
+                trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
+            }
+            return trimmed;
+        }
+    }
 }