Feature: Add local LLM vision clients (llama.cpp and Ollama)

Add LlamaCppVisionClient and OllamaVisionClient for local AI inference as alternatives to OpenAI and Claude. Includes text-only prompt support for LLM-assisted receipt matching. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 16:53:51 -05:00
parent 2c74e5e403
commit dc56021a77
3 changed files with 261 additions and 0 deletions
--- a/MoneyMap/Pages/ViewReceipt.cshtml
+++ b/MoneyMap/Pages/ViewReceipt.cshtml
@@ -165,6 +165,13 @@
                        <div class="mb-2">
                            <label for="model" class="form-label small">AI Model</label>
                            <select name="model" id="model" class="form-select form-select-sm">
                                <optgroup label="Local (llama.cpp)">
                                    <option value="llamacpp:GLM-4.6V-UD-Q4_K_XL-00001-of-00002">GLM-4.6V (Vision)</option>
                                </optgroup>
                                <optgroup label="Local (Ollama)">
                                    <option value="ollama:llava">LLaVA (Vision)</option>
                                    <option value="ollama:llava:13b">LLaVA 13B (Vision)</option>
                                </optgroup>
                                <optgroup label="OpenAI">
                                    <option value="gpt-4o-mini" selected>GPT-4o Mini (Fast & Cheap)</option>
                                    <option value="gpt-4o">GPT-4o (Smarter)</option>
--- a/MoneyMap/Program.cs
+++ b/MoneyMap/Program.cs
@@ -61,6 +61,8 @@ builder.Services.AddScoped<IPdfToImageConverter, PdfToImageConverter>();
 // AI vision clients
 builder.Services.AddHttpClient<OpenAIVisionClient>();
 builder.Services.AddHttpClient<ClaudeVisionClient>();
 builder.Services.AddHttpClient<OllamaVisionClient>();
 builder.Services.AddHttpClient<LlamaCppVisionClient>();
 builder.Services.AddScoped<IReceiptParser, AIReceiptParser>();
 // AI categorization service
--- a/MoneyMap/Services/AIVisionClient.cs
+++ b/MoneyMap/Services/AIVisionClient.cs
@@ -216,4 +216,256 @@ namespace MoneyMap.Services
            return trimmed;
        }
    }
    /// <summary>
    /// llama.cpp server client using OpenAI-compatible vision API for local LLM inference.
    /// </summary>
    public class LlamaCppVisionClient : IAIVisionClient
    {
        private readonly HttpClient _httpClient;
        private readonly IConfiguration _configuration;
        private readonly ILogger<LlamaCppVisionClient> _logger;
        public LlamaCppVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<LlamaCppVisionClient> logger)
        {
            _httpClient = httpClient;
            _httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
            _configuration = configuration;
            _logger = logger;
        }
        /// <summary>
        /// Send a text-only prompt to the LLM (no image).
        /// </summary>
        public async Task<VisionApiResult> SendTextPromptAsync(string prompt, string? model = null)
        {
            var baseUrl = _configuration["LlamaCpp:BaseUrl"] ?? "http://athena.lan:11434";
            var llamaModel = model ?? "GLM-4.6V-UD-Q4_K_XL-00001-of-00002";
            if (llamaModel.StartsWith("llamacpp:"))
                llamaModel = llamaModel[9..];
            _logger.LogInformation("LlamaCpp: Sending text prompt to {BaseUrl} with model {Model}", baseUrl, llamaModel);
            var requestBody = new
            {
                model = llamaModel,
                messages = new[]
                {
                    new
                    {
                        role = "user",
                        content = prompt
                    }
                },
                max_tokens = 1024,
                temperature = 0.1
            };
            try
            {
                var json = JsonSerializer.Serialize(requestBody);
                var content = new StringContent(json, Encoding.UTF8, "application/json");
                var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
                if (!response.IsSuccessStatusCode)
                {
                    var errorContent = await response.Content.ReadAsStringAsync();
                    _logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
                    return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
                }
                var responseJson = await response.Content.ReadAsStringAsync();
                var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
                var messageContent = responseObj
                    .GetProperty("choices")[0]
                    .GetProperty("message")
                    .GetProperty("content")
                    .GetString();
                _logger.LogInformation("LlamaCpp: Text prompt completed successfully");
                return VisionApiResult.Success(CleanJsonResponse(messageContent));
            }
            catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
            {
                _logger.LogError("llama.cpp request timed out");
                return VisionApiResult.Failure("llama.cpp request timed out.");
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "llama.cpp API call failed: {Message}", ex.Message);
                return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
            }
        }
        public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
        {
            var baseUrl = _configuration["LlamaCpp:BaseUrl"] ?? "http://athena.lan:8080";
            // Strip "llamacpp:" prefix if present
            var llamaModel = model.StartsWith("llamacpp:") ? model[9..] : model;
            _logger.LogInformation("LlamaCpp: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
                baseUrl, llamaModel, base64Image.Length);
            var requestBody = new
            {
                model = llamaModel,
                messages = new[]
                {
                    new
                    {
                        role = "user",
                        content = new object[]
                        {
                            new
                            {
                                type = "image_url",
                                image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
                            },
                            new { type = "text", text = prompt }
                        }
                    }
                },
                max_tokens = 4096,
                temperature = 0.1
            };
            try
            {
                var json = JsonSerializer.Serialize(requestBody);
                _logger.LogDebug("LlamaCpp: Request payload size: {Size} bytes", json.Length);
                var content = new StringContent(json, Encoding.UTF8, "application/json");
                var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
                if (!response.IsSuccessStatusCode)
                {
                    var errorContent = await response.Content.ReadAsStringAsync();
                    _logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
                    return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
                }
                var responseJson = await response.Content.ReadAsStringAsync();
                _logger.LogDebug("LlamaCpp: Response received, size: {Size} bytes", responseJson.Length);
                var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
                var messageContent = responseObj
                    .GetProperty("choices")[0]
                    .GetProperty("message")
                    .GetProperty("content")
                    .GetString();
                _logger.LogInformation("LlamaCpp: Successfully parsed response");
                return VisionApiResult.Success(CleanJsonResponse(messageContent));
            }
            catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
            {
                _logger.LogError("llama.cpp request timed out after 5 minutes");
                return VisionApiResult.Failure("llama.cpp request timed out. The model may be too slow or not loaded.");
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "llama.cpp Vision API call failed: {Message}", ex.Message);
                return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
            }
        }
        private static string CleanJsonResponse(string? content)
        {
            var trimmed = content?.Trim() ?? "";
            if (trimmed.StartsWith("```json"))
            {
                trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
            }
            return trimmed;
        }
    }
    /// <summary>
    /// Ollama Vision API client for local LLM inference.
    /// </summary>
    public class OllamaVisionClient : IAIVisionClient
    {
        private readonly HttpClient _httpClient;
        private readonly IConfiguration _configuration;
        private readonly ILogger<OllamaVisionClient> _logger;
        public OllamaVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OllamaVisionClient> logger)
        {
            _httpClient = httpClient;
            _httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
            _configuration = configuration;
            _logger = logger;
        }
        public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
        {
            var baseUrl = _configuration["Ollama:BaseUrl"] ?? "http://athena.lan:11434";
            // Strip "ollama:" prefix if present
            var ollamaModel = model.StartsWith("ollama:") ? model[7..] : model;
            _logger.LogInformation("Ollama: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
                baseUrl, ollamaModel, base64Image.Length);
            var requestBody = new
            {
                model = ollamaModel,
                prompt = prompt,
                images = new[] { base64Image },
                stream = false,
                options = new
                {
                    temperature = 0.1
                }
            };
            try
            {
                var json = JsonSerializer.Serialize(requestBody);
                _logger.LogDebug("Ollama: Request payload size: {Size} bytes", json.Length);
                var content = new StringContent(json, Encoding.UTF8, "application/json");
                var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/api/generate", content);
                if (!response.IsSuccessStatusCode)
                {
                    var errorContent = await response.Content.ReadAsStringAsync();
                    _logger.LogError("Ollama API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
                    return VisionApiResult.Failure($"Ollama API error ({response.StatusCode}): {errorContent}");
                }
                var responseJson = await response.Content.ReadAsStringAsync();
                _logger.LogDebug("Ollama: Response received, size: {Size} bytes", responseJson.Length);
                var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
                var messageContent = responseObj.GetProperty("response").GetString();
                _logger.LogInformation("Ollama: Successfully parsed response");
                return VisionApiResult.Success(CleanJsonResponse(messageContent));
            }
            catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
            {
                _logger.LogError("Ollama request timed out after 5 minutes");
                return VisionApiResult.Failure("Ollama request timed out. The model may be too slow or not loaded.");
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "Ollama Vision API call failed: {Message}", ex.Message);
                return VisionApiResult.Failure($"Ollama API error: {ex.Message}");
            }
        }
        private static string CleanJsonResponse(string? content)
        {
            var trimmed = content?.Trim() ?? "";
            if (trimmed.StartsWith("```json"))
            {
                trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
            }
            return trimmed;
        }
    }
 }