From 9f64c7784a78504dd5bacfde047e7e66fee6f6d3 Mon Sep 17 00:00:00 2001 From: AJ Isaacs Date: Mon, 24 Nov 2025 21:11:56 -0500 Subject: [PATCH] Refactor: Extract AI vision clients from AIReceiptParser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract for better separation of concerns: - Services/PdfToImageConverter.cs - PDF to image conversion using ImageMagick - Services/AIVisionClient.cs - OpenAI and Claude vision API clients - IAIVisionClient interface - OpenAIVisionClient, ClaudeVisionClient implementations AIReceiptParser now orchestrates using injected services. Adds proper logging for auto-mapping operations. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- MoneyMap/Services/AIReceiptParser.cs | 298 ++++------------------- MoneyMap/Services/AIVisionClient.cs | 219 +++++++++++++++++ MoneyMap/Services/PdfToImageConverter.cs | 61 +++++ 3 files changed, 329 insertions(+), 249 deletions(-) create mode 100644 MoneyMap/Services/AIVisionClient.cs create mode 100644 MoneyMap/Services/PdfToImageConverter.cs diff --git a/MoneyMap/Services/AIReceiptParser.cs b/MoneyMap/Services/AIReceiptParser.cs index f6fe840..3bfaa50 100644 --- a/MoneyMap/Services/AIReceiptParser.cs +++ b/MoneyMap/Services/AIReceiptParser.cs @@ -1,8 +1,6 @@ -using ImageMagick; using Microsoft.EntityFrameworkCore; using MoneyMap.Data; using MoneyMap.Models; -using System.Text; using System.Text.Json; namespace MoneyMap.Services @@ -16,26 +14,32 @@ namespace MoneyMap.Services { private readonly MoneyMapContext _db; private readonly IWebHostEnvironment _environment; - private readonly IConfiguration _configuration; - private readonly HttpClient _httpClient; + private readonly IPdfToImageConverter _pdfConverter; + private readonly OpenAIVisionClient _openAIClient; + private readonly ClaudeVisionClient _claudeClient; private readonly IMerchantService _merchantService; private readonly IServiceProvider _serviceProvider; + private readonly ILogger _logger; private string? _promptTemplate; public AIReceiptParser( MoneyMapContext db, IWebHostEnvironment environment, - IConfiguration configuration, - HttpClient httpClient, + IPdfToImageConverter pdfConverter, + OpenAIVisionClient openAIClient, + ClaudeVisionClient claudeClient, IMerchantService merchantService, - IServiceProvider serviceProvider) + IServiceProvider serviceProvider, + ILogger logger) { _db = db; _environment = environment; - _configuration = configuration; - _httpClient = httpClient; + _pdfConverter = pdfConverter; + _openAIClient = openAIClient; + _claudeClient = claudeClient; _merchantService = merchantService; _serviceProvider = serviceProvider; + _logger = logger; } public async Task ParseReceiptAsync(long receiptId, string? model = null) @@ -47,30 +51,10 @@ namespace MoneyMap.Services if (receipt == null) return ReceiptParseResult.Failure("Receipt not found."); - // Default to gpt-4o-mini if no model specified var selectedModel = model ?? "gpt-4o-mini"; - - // Determine provider based on model name var isClaude = selectedModel.StartsWith("claude-"); var provider = isClaude ? "Anthropic" : "OpenAI"; - // Get appropriate API key - string? apiKey; - if (isClaude) - { - apiKey = Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY") - ?? _configuration["Anthropic:ApiKey"]; - if (string.IsNullOrWhiteSpace(apiKey)) - return ReceiptParseResult.Failure("Anthropic API key not configured. Set ANTHROPIC_API_KEY environment variable or Anthropic:ApiKey in appsettings.json"); - } - else - { - apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") - ?? _configuration["OpenAI:ApiKey"]; - if (string.IsNullOrWhiteSpace(apiKey)) - return ReceiptParseResult.Failure("OpenAI API key not configured. Set OPENAI_API_KEY environment variable or OpenAI:ApiKey in appsettings.json"); - } - var filePath = Path.Combine(_environment.WebRootPath, receipt.StoragePath.Replace("/", Path.DirectorySeparatorChar.ToString())); if (!File.Exists(filePath)) @@ -92,23 +76,45 @@ namespace MoneyMap.Services if (receipt.ContentType == "application/pdf") { - // Convert PDF to image using ImageMagick - base64Data = await ConvertPdfToBase64ImageAsync(filePath); + base64Data = await _pdfConverter.ConvertFirstPageToBase64Async(filePath); mediaType = "image/png"; } else { - // For images, use directly var fileBytes = await File.ReadAllBytesAsync(filePath); base64Data = Convert.ToBase64String(fileBytes); mediaType = receipt.ContentType; } - // Call Vision API with transaction name context + // Build prompt + var promptText = await LoadPromptTemplateAsync(); var transactionName = receipt.Transaction?.Name; - var parseData = isClaude - ? await CallClaudeVisionAsync(apiKey, base64Data, mediaType, selectedModel, transactionName) - : await CallOpenAIVisionAsync(apiKey, base64Data, mediaType, selectedModel, transactionName); + if (!string.IsNullOrWhiteSpace(transactionName)) + { + promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear."; + } + promptText += "\n\nRespond ONLY with valid JSON, no other text."; + + // Call appropriate vision API + var client = isClaude ? (IAIVisionClient)_claudeClient : _openAIClient; + var visionResult = await client.AnalyzeImageAsync(base64Data, mediaType, promptText, selectedModel); + + if (!visionResult.IsSuccess) + { + parseLog.Error = visionResult.ErrorMessage; + parseLog.CompletedAtUtc = DateTime.UtcNow; + _db.ReceiptParseLogs.Add(parseLog); + await _db.SaveChangesAsync(); + return ReceiptParseResult.Failure(visionResult.ErrorMessage!); + } + + // Parse the JSON response + var parseData = string.IsNullOrWhiteSpace(visionResult.Content) + ? new ParsedReceiptData() + : JsonSerializer.Deserialize(visionResult.Content, new JsonSerializerOptions + { + PropertyNameCaseInsensitive = true + }) ?? new ParsedReceiptData(); // Update receipt with parsed data receipt.Merchant = parseData.Merchant; @@ -118,7 +124,7 @@ namespace MoneyMap.Services receipt.ReceiptDate = parseData.ReceiptDate; receipt.DueDate = parseData.DueDate; - // Update transaction merchant if we extracted one and transaction doesn't have one yet + // Update transaction merchant if extracted and transaction doesn't have one if (receipt.Transaction != null && !string.IsNullOrWhiteSpace(parseData.Merchant) && receipt.Transaction.MerchantId == null) @@ -156,19 +162,19 @@ namespace MoneyMap.Services _db.ReceiptParseLogs.Add(parseLog); await _db.SaveChangesAsync(); - // Attempt auto-mapping after successful parse (only if receipt is not already mapped) + // Attempt auto-mapping after successful parse if (!receipt.TransactionId.HasValue) { try { - // Use service locator pattern to avoid circular dependency using var scope = _serviceProvider.CreateScope(); var autoMapper = scope.ServiceProvider.GetRequiredService(); await autoMapper.AutoMapReceiptAsync(receiptId); + _logger.LogInformation("Auto-mapping completed for receipt {ReceiptId}", receiptId); } - catch + catch (Exception ex) { - // Ignore auto-mapping errors - parsing was successful + _logger.LogWarning(ex, "Auto-mapping failed for receipt {ReceiptId}: {Message}", receiptId, ex.Message); } } @@ -181,44 +187,11 @@ namespace MoneyMap.Services _db.ReceiptParseLogs.Add(parseLog); await _db.SaveChangesAsync(); + _logger.LogError(ex, "Error parsing receipt {ReceiptId}: {Message}", receiptId, ex.Message); return ReceiptParseResult.Failure($"Error parsing receipt: {ex.Message}"); } } - private async Task ConvertPdfToBase64ImageAsync(string pdfPath) - { - return await Task.Run(() => - { - var pdfBytes = File.ReadAllBytes(pdfPath); - - // Render settings: 220 DPI for good quality - var settings = new MagickReadSettings - { - Density = new Density(220), - BackgroundColor = MagickColors.White, - ColorSpace = ColorSpace.sRGB - }; - - using var pages = new MagickImageCollection(); - pages.Read(pdfBytes, settings); - - // Use first page only - if (pages.Count == 0) - throw new Exception("PDF has no pages"); - - using var img = (MagickImage)pages[0].Clone(); - - // Ensure we have a clean 8-bit RGB canvas - img.ColorType = ColorType.TrueColor; - img.Alpha(AlphaOption.Remove); // flatten onto white - img.ResetPage(); - - // Convert to PNG bytes - var imageBytes = img.ToByteArray(MagickFormat.Png); - return Convert.ToBase64String(imageBytes); - }); - } - private async Task LoadPromptTemplateAsync() { if (_promptTemplate != null) @@ -232,179 +205,6 @@ namespace MoneyMap.Services _promptTemplate = await File.ReadAllTextAsync(promptPath); return _promptTemplate; } - - private async Task CallOpenAIVisionAsync(string apiKey, string base64Image, string mediaType, string model, string? transactionName = null) - { - // Load the prompt template from file - var promptText = await LoadPromptTemplateAsync(); - - // Add transaction context if available - if (!string.IsNullOrWhiteSpace(transactionName)) - { - promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear."; - } - - promptText += "\n\nRespond ONLY with valid JSON, no other text."; - - var requestBody = new - { - model = model, - messages = new[] - { - new - { - role = "user", - content = new object[] - { - new - { - type = "text", - text = promptText - }, - new - { - type = "image_url", - image_url = new - { - url = $"data:{mediaType};base64,{base64Image}" - } - } - } - } - }, - max_tokens = 2000, - temperature = 0.1 - }; - - _httpClient.DefaultRequestHeaders.Clear(); - _httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}"); - - var json = JsonSerializer.Serialize(requestBody); - var content = new StringContent(json, Encoding.UTF8, "application/json"); - - var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content); - - if (!response.IsSuccessStatusCode) - { - var errorContent = await response.Content.ReadAsStringAsync(); - throw new Exception($"OpenAI API error ({response.StatusCode}): {errorContent}"); - } - - var responseJson = await response.Content.ReadAsStringAsync(); - var responseObj = JsonSerializer.Deserialize(responseJson); - - var messageContent = responseObj - .GetProperty("choices")[0] - .GetProperty("message") - .GetProperty("content") - .GetString(); - - // Clean up the response - remove markdown code blocks if present - messageContent = messageContent?.Trim(); - if (messageContent?.StartsWith("```json") == true) - { - messageContent = messageContent.Replace("```json", "").Replace("```", "").Trim(); - } - - if (string.IsNullOrWhiteSpace(messageContent)) - { - return new ParsedReceiptData(); - } - - var parsedData = JsonSerializer.Deserialize(messageContent, new JsonSerializerOptions - { - PropertyNameCaseInsensitive = true - }); - - return parsedData ?? new ParsedReceiptData(); - } - - private async Task CallClaudeVisionAsync(string apiKey, string base64Image, string mediaType, string model, string? transactionName = null) - { - // Load the prompt template from file - var promptText = await LoadPromptTemplateAsync(); - - // Add transaction context if available - if (!string.IsNullOrWhiteSpace(transactionName)) - { - promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear."; - } - - promptText += "\n\nRespond ONLY with valid JSON, no other text."; - - var requestBody = new - { - model = model, - max_tokens = 2000, - messages = new[] - { - new - { - role = "user", - content = new object[] - { - new - { - type = "image", - source = new - { - type = "base64", - media_type = mediaType, - data = base64Image - } - }, - new - { - type = "text", - text = promptText - } - } - } - } - }; - - _httpClient.DefaultRequestHeaders.Clear(); - _httpClient.DefaultRequestHeaders.Add("x-api-key", apiKey); - _httpClient.DefaultRequestHeaders.Add("anthropic-version", "2023-06-01"); - - var json = JsonSerializer.Serialize(requestBody); - var content = new StringContent(json, Encoding.UTF8, "application/json"); - - var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content); - - if (!response.IsSuccessStatusCode) - { - var errorContent = await response.Content.ReadAsStringAsync(); - throw new Exception($"Anthropic API error ({response.StatusCode}): {errorContent}"); - } - - var responseJson = await response.Content.ReadAsStringAsync(); - var responseObj = JsonSerializer.Deserialize(responseJson); - - var messageContent = responseObj - .GetProperty("content")[0] - .GetProperty("text") - .GetString(); - - // Clean up the response - remove markdown code blocks if present - messageContent = messageContent?.Trim(); - if (messageContent?.StartsWith("```json") == true) - { - messageContent = messageContent.Replace("```json", "").Replace("```", "").Trim(); - } - - if (string.IsNullOrWhiteSpace(messageContent)) - { - return new ParsedReceiptData(); - } - - var parsedData = JsonSerializer.Deserialize(messageContent, new JsonSerializerOptions - { - PropertyNameCaseInsensitive = true - }); - - return parsedData ?? new ParsedReceiptData(); - } } public class ParsedReceiptData @@ -440,4 +240,4 @@ namespace MoneyMap.Services public static ReceiptParseResult Failure(string message) => new() { IsSuccess = false, Message = message }; } -} \ No newline at end of file +} diff --git a/MoneyMap/Services/AIVisionClient.cs b/MoneyMap/Services/AIVisionClient.cs new file mode 100644 index 0000000..eb503c0 --- /dev/null +++ b/MoneyMap/Services/AIVisionClient.cs @@ -0,0 +1,219 @@ +using System.Text; +using System.Text.Json; + +namespace MoneyMap.Services +{ + /// + /// Result of an AI vision API call. + /// + public class VisionApiResult + { + public bool IsSuccess { get; init; } + public string? Content { get; init; } + public string? ErrorMessage { get; init; } + + public static VisionApiResult Success(string content) => + new() { IsSuccess = true, Content = content }; + + public static VisionApiResult Failure(string error) => + new() { IsSuccess = false, ErrorMessage = error }; + } + + /// + /// Client for making vision API calls to AI providers. + /// + public interface IAIVisionClient + { + Task AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model); + } + + /// + /// OpenAI Vision API client. + /// + public class OpenAIVisionClient : IAIVisionClient + { + private readonly HttpClient _httpClient; + private readonly IConfiguration _configuration; + private readonly ILogger _logger; + + public OpenAIVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger logger) + { + _httpClient = httpClient; + _configuration = configuration; + _logger = logger; + } + + public async Task AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model) + { + var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ?? _configuration["OpenAI:ApiKey"]; + + if (string.IsNullOrWhiteSpace(apiKey)) + return VisionApiResult.Failure("OpenAI API key not configured. Set OPENAI_API_KEY environment variable or OpenAI:ApiKey in appsettings.json"); + + var requestBody = new + { + model = model, + messages = new[] + { + new + { + role = "user", + content = new object[] + { + new { type = "text", text = prompt }, + new + { + type = "image_url", + image_url = new { url = $"data:{mediaType};base64,{base64Image}" } + } + } + } + }, + max_tokens = 2000, + temperature = 0.1 + }; + + try + { + _httpClient.DefaultRequestHeaders.Clear(); + _httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}"); + + var json = JsonSerializer.Serialize(requestBody); + var content = new StringContent(json, Encoding.UTF8, "application/json"); + + var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content); + + if (!response.IsSuccessStatusCode) + { + var errorContent = await response.Content.ReadAsStringAsync(); + _logger.LogError("OpenAI API error ({StatusCode}): {Error}", response.StatusCode, errorContent); + return VisionApiResult.Failure($"OpenAI API error ({response.StatusCode}): {errorContent}"); + } + + var responseJson = await response.Content.ReadAsStringAsync(); + var responseObj = JsonSerializer.Deserialize(responseJson); + + var messageContent = responseObj + .GetProperty("choices")[0] + .GetProperty("message") + .GetProperty("content") + .GetString(); + + return VisionApiResult.Success(CleanJsonResponse(messageContent)); + } + catch (Exception ex) + { + _logger.LogError(ex, "OpenAI Vision API call failed: {Message}", ex.Message); + return VisionApiResult.Failure($"OpenAI API error: {ex.Message}"); + } + } + + private static string CleanJsonResponse(string? content) + { + var trimmed = content?.Trim() ?? ""; + if (trimmed.StartsWith("```json")) + { + trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim(); + } + return trimmed; + } + } + + /// + /// Anthropic Claude Vision API client. + /// + public class ClaudeVisionClient : IAIVisionClient + { + private readonly HttpClient _httpClient; + private readonly IConfiguration _configuration; + private readonly ILogger _logger; + + public ClaudeVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger logger) + { + _httpClient = httpClient; + _configuration = configuration; + _logger = logger; + } + + public async Task AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model) + { + var apiKey = Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY") + ?? _configuration["Anthropic:ApiKey"]; + + if (string.IsNullOrWhiteSpace(apiKey)) + return VisionApiResult.Failure("Anthropic API key not configured. Set ANTHROPIC_API_KEY environment variable or Anthropic:ApiKey in appsettings.json"); + + var requestBody = new + { + model = model, + max_tokens = 2000, + messages = new[] + { + new + { + role = "user", + content = new object[] + { + new + { + type = "image", + source = new + { + type = "base64", + media_type = mediaType, + data = base64Image + } + }, + new { type = "text", text = prompt } + } + } + } + }; + + try + { + _httpClient.DefaultRequestHeaders.Clear(); + _httpClient.DefaultRequestHeaders.Add("x-api-key", apiKey); + _httpClient.DefaultRequestHeaders.Add("anthropic-version", "2023-06-01"); + + var json = JsonSerializer.Serialize(requestBody); + var content = new StringContent(json, Encoding.UTF8, "application/json"); + + var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content); + + if (!response.IsSuccessStatusCode) + { + var errorContent = await response.Content.ReadAsStringAsync(); + _logger.LogError("Anthropic API error ({StatusCode}): {Error}", response.StatusCode, errorContent); + return VisionApiResult.Failure($"Anthropic API error ({response.StatusCode}): {errorContent}"); + } + + var responseJson = await response.Content.ReadAsStringAsync(); + var responseObj = JsonSerializer.Deserialize(responseJson); + + var messageContent = responseObj + .GetProperty("content")[0] + .GetProperty("text") + .GetString(); + + return VisionApiResult.Success(CleanJsonResponse(messageContent)); + } + catch (Exception ex) + { + _logger.LogError(ex, "Claude Vision API call failed: {Message}", ex.Message); + return VisionApiResult.Failure($"Anthropic API error: {ex.Message}"); + } + } + + private static string CleanJsonResponse(string? content) + { + var trimmed = content?.Trim() ?? ""; + if (trimmed.StartsWith("```json")) + { + trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim(); + } + return trimmed; + } + } +} diff --git a/MoneyMap/Services/PdfToImageConverter.cs b/MoneyMap/Services/PdfToImageConverter.cs new file mode 100644 index 0000000..0aaeaa2 --- /dev/null +++ b/MoneyMap/Services/PdfToImageConverter.cs @@ -0,0 +1,61 @@ +using ImageMagick; + +namespace MoneyMap.Services +{ + /// + /// Service for converting PDF files to images for AI processing. + /// + public interface IPdfToImageConverter + { + /// + /// Converts the first page of a PDF to a base64-encoded PNG image. + /// + Task ConvertFirstPageToBase64Async(string pdfPath); + + /// + /// Converts PDF bytes to a base64-encoded PNG image. + /// + Task ConvertFirstPageToBase64Async(byte[] pdfBytes); + } + + public class PdfToImageConverter : IPdfToImageConverter + { + private const int DefaultDpi = 220; + + public Task ConvertFirstPageToBase64Async(string pdfPath) + { + var pdfBytes = File.ReadAllBytes(pdfPath); + return ConvertFirstPageToBase64Async(pdfBytes); + } + + public Task ConvertFirstPageToBase64Async(byte[] pdfBytes) + { + return Task.Run(() => + { + var settings = new MagickReadSettings + { + Density = new Density(DefaultDpi), + BackgroundColor = MagickColors.White, + ColorSpace = ColorSpace.sRGB + }; + + using var pages = new MagickImageCollection(); + pages.Read(pdfBytes, settings); + + if (pages.Count == 0) + throw new InvalidOperationException("PDF has no pages"); + + using var img = (MagickImage)pages[0].Clone(); + + // Ensure we have a clean 8-bit RGB canvas + img.ColorType = ColorType.TrueColor; + img.Alpha(AlphaOption.Remove); // flatten onto white + img.ResetPage(); + + // Convert to PNG bytes + var imageBytes = img.ToByteArray(MagickFormat.Png); + return Convert.ToBase64String(imageBytes); + }); + } + } +}