Refactor: Extract AI vision clients from AIReceiptParser

Extract for better separation of concerns:
- Services/PdfToImageConverter.cs - PDF to image conversion using ImageMagick
- Services/AIVisionClient.cs - OpenAI and Claude vision API clients
  - IAIVisionClient interface
  - OpenAIVisionClient, ClaudeVisionClient implementations

AIReceiptParser now orchestrates using injected services.
Adds proper logging for auto-mapping operations.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-24 21:11:56 -05:00
parent ea7b2c2a3c
commit 9f64c7784a
3 changed files with 329 additions and 249 deletions

View File

@@ -1,8 +1,6 @@
using ImageMagick;
using Microsoft.EntityFrameworkCore;
using MoneyMap.Data;
using MoneyMap.Models;
using System.Text;
using System.Text.Json;
namespace MoneyMap.Services
@@ -16,26 +14,32 @@ namespace MoneyMap.Services
{
private readonly MoneyMapContext _db;
private readonly IWebHostEnvironment _environment;
private readonly IConfiguration _configuration;
private readonly HttpClient _httpClient;
private readonly IPdfToImageConverter _pdfConverter;
private readonly OpenAIVisionClient _openAIClient;
private readonly ClaudeVisionClient _claudeClient;
private readonly IMerchantService _merchantService;
private readonly IServiceProvider _serviceProvider;
private readonly ILogger<AIReceiptParser> _logger;
private string? _promptTemplate;
public AIReceiptParser(
MoneyMapContext db,
IWebHostEnvironment environment,
IConfiguration configuration,
HttpClient httpClient,
IPdfToImageConverter pdfConverter,
OpenAIVisionClient openAIClient,
ClaudeVisionClient claudeClient,
IMerchantService merchantService,
IServiceProvider serviceProvider)
IServiceProvider serviceProvider,
ILogger<AIReceiptParser> logger)
{
_db = db;
_environment = environment;
_configuration = configuration;
_httpClient = httpClient;
_pdfConverter = pdfConverter;
_openAIClient = openAIClient;
_claudeClient = claudeClient;
_merchantService = merchantService;
_serviceProvider = serviceProvider;
_logger = logger;
}
public async Task<ReceiptParseResult> ParseReceiptAsync(long receiptId, string? model = null)
@@ -47,30 +51,10 @@ namespace MoneyMap.Services
if (receipt == null)
return ReceiptParseResult.Failure("Receipt not found.");
// Default to gpt-4o-mini if no model specified
var selectedModel = model ?? "gpt-4o-mini";
// Determine provider based on model name
var isClaude = selectedModel.StartsWith("claude-");
var provider = isClaude ? "Anthropic" : "OpenAI";
// Get appropriate API key
string? apiKey;
if (isClaude)
{
apiKey = Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY")
?? _configuration["Anthropic:ApiKey"];
if (string.IsNullOrWhiteSpace(apiKey))
return ReceiptParseResult.Failure("Anthropic API key not configured. Set ANTHROPIC_API_KEY environment variable or Anthropic:ApiKey in appsettings.json");
}
else
{
apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY")
?? _configuration["OpenAI:ApiKey"];
if (string.IsNullOrWhiteSpace(apiKey))
return ReceiptParseResult.Failure("OpenAI API key not configured. Set OPENAI_API_KEY environment variable or OpenAI:ApiKey in appsettings.json");
}
var filePath = Path.Combine(_environment.WebRootPath, receipt.StoragePath.Replace("/", Path.DirectorySeparatorChar.ToString()));
if (!File.Exists(filePath))
@@ -92,23 +76,45 @@ namespace MoneyMap.Services
if (receipt.ContentType == "application/pdf")
{
// Convert PDF to image using ImageMagick
base64Data = await ConvertPdfToBase64ImageAsync(filePath);
base64Data = await _pdfConverter.ConvertFirstPageToBase64Async(filePath);
mediaType = "image/png";
}
else
{
// For images, use directly
var fileBytes = await File.ReadAllBytesAsync(filePath);
base64Data = Convert.ToBase64String(fileBytes);
mediaType = receipt.ContentType;
}
// Call Vision API with transaction name context
// Build prompt
var promptText = await LoadPromptTemplateAsync();
var transactionName = receipt.Transaction?.Name;
var parseData = isClaude
? await CallClaudeVisionAsync(apiKey, base64Data, mediaType, selectedModel, transactionName)
: await CallOpenAIVisionAsync(apiKey, base64Data, mediaType, selectedModel, transactionName);
if (!string.IsNullOrWhiteSpace(transactionName))
{
promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear.";
}
promptText += "\n\nRespond ONLY with valid JSON, no other text.";
// Call appropriate vision API
var client = isClaude ? (IAIVisionClient)_claudeClient : _openAIClient;
var visionResult = await client.AnalyzeImageAsync(base64Data, mediaType, promptText, selectedModel);
if (!visionResult.IsSuccess)
{
parseLog.Error = visionResult.ErrorMessage;
parseLog.CompletedAtUtc = DateTime.UtcNow;
_db.ReceiptParseLogs.Add(parseLog);
await _db.SaveChangesAsync();
return ReceiptParseResult.Failure(visionResult.ErrorMessage!);
}
// Parse the JSON response
var parseData = string.IsNullOrWhiteSpace(visionResult.Content)
? new ParsedReceiptData()
: JsonSerializer.Deserialize<ParsedReceiptData>(visionResult.Content, new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true
}) ?? new ParsedReceiptData();
// Update receipt with parsed data
receipt.Merchant = parseData.Merchant;
@@ -118,7 +124,7 @@ namespace MoneyMap.Services
receipt.ReceiptDate = parseData.ReceiptDate;
receipt.DueDate = parseData.DueDate;
// Update transaction merchant if we extracted one and transaction doesn't have one yet
// Update transaction merchant if extracted and transaction doesn't have one
if (receipt.Transaction != null &&
!string.IsNullOrWhiteSpace(parseData.Merchant) &&
receipt.Transaction.MerchantId == null)
@@ -156,19 +162,19 @@ namespace MoneyMap.Services
_db.ReceiptParseLogs.Add(parseLog);
await _db.SaveChangesAsync();
// Attempt auto-mapping after successful parse (only if receipt is not already mapped)
// Attempt auto-mapping after successful parse
if (!receipt.TransactionId.HasValue)
{
try
{
// Use service locator pattern to avoid circular dependency
using var scope = _serviceProvider.CreateScope();
var autoMapper = scope.ServiceProvider.GetRequiredService<IReceiptAutoMapper>();
await autoMapper.AutoMapReceiptAsync(receiptId);
_logger.LogInformation("Auto-mapping completed for receipt {ReceiptId}", receiptId);
}
catch
catch (Exception ex)
{
// Ignore auto-mapping errors - parsing was successful
_logger.LogWarning(ex, "Auto-mapping failed for receipt {ReceiptId}: {Message}", receiptId, ex.Message);
}
}
@@ -181,44 +187,11 @@ namespace MoneyMap.Services
_db.ReceiptParseLogs.Add(parseLog);
await _db.SaveChangesAsync();
_logger.LogError(ex, "Error parsing receipt {ReceiptId}: {Message}", receiptId, ex.Message);
return ReceiptParseResult.Failure($"Error parsing receipt: {ex.Message}");
}
}
private async Task<string> ConvertPdfToBase64ImageAsync(string pdfPath)
{
return await Task.Run(() =>
{
var pdfBytes = File.ReadAllBytes(pdfPath);
// Render settings: 220 DPI for good quality
var settings = new MagickReadSettings
{
Density = new Density(220),
BackgroundColor = MagickColors.White,
ColorSpace = ColorSpace.sRGB
};
using var pages = new MagickImageCollection();
pages.Read(pdfBytes, settings);
// Use first page only
if (pages.Count == 0)
throw new Exception("PDF has no pages");
using var img = (MagickImage)pages[0].Clone();
// Ensure we have a clean 8-bit RGB canvas
img.ColorType = ColorType.TrueColor;
img.Alpha(AlphaOption.Remove); // flatten onto white
img.ResetPage();
// Convert to PNG bytes
var imageBytes = img.ToByteArray(MagickFormat.Png);
return Convert.ToBase64String(imageBytes);
});
}
private async Task<string> LoadPromptTemplateAsync()
{
if (_promptTemplate != null)
@@ -232,179 +205,6 @@ namespace MoneyMap.Services
_promptTemplate = await File.ReadAllTextAsync(promptPath);
return _promptTemplate;
}
private async Task<ParsedReceiptData> CallOpenAIVisionAsync(string apiKey, string base64Image, string mediaType, string model, string? transactionName = null)
{
// Load the prompt template from file
var promptText = await LoadPromptTemplateAsync();
// Add transaction context if available
if (!string.IsNullOrWhiteSpace(transactionName))
{
promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear.";
}
promptText += "\n\nRespond ONLY with valid JSON, no other text.";
var requestBody = new
{
model = model,
messages = new[]
{
new
{
role = "user",
content = new object[]
{
new
{
type = "text",
text = promptText
},
new
{
type = "image_url",
image_url = new
{
url = $"data:{mediaType};base64,{base64Image}"
}
}
}
}
},
max_tokens = 2000,
temperature = 0.1
};
_httpClient.DefaultRequestHeaders.Clear();
_httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}");
var json = JsonSerializer.Serialize(requestBody);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
throw new Exception($"OpenAI API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString();
// Clean up the response - remove markdown code blocks if present
messageContent = messageContent?.Trim();
if (messageContent?.StartsWith("```json") == true)
{
messageContent = messageContent.Replace("```json", "").Replace("```", "").Trim();
}
if (string.IsNullOrWhiteSpace(messageContent))
{
return new ParsedReceiptData();
}
var parsedData = JsonSerializer.Deserialize<ParsedReceiptData>(messageContent, new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true
});
return parsedData ?? new ParsedReceiptData();
}
private async Task<ParsedReceiptData> CallClaudeVisionAsync(string apiKey, string base64Image, string mediaType, string model, string? transactionName = null)
{
// Load the prompt template from file
var promptText = await LoadPromptTemplateAsync();
// Add transaction context if available
if (!string.IsNullOrWhiteSpace(transactionName))
{
promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear.";
}
promptText += "\n\nRespond ONLY with valid JSON, no other text.";
var requestBody = new
{
model = model,
max_tokens = 2000,
messages = new[]
{
new
{
role = "user",
content = new object[]
{
new
{
type = "image",
source = new
{
type = "base64",
media_type = mediaType,
data = base64Image
}
},
new
{
type = "text",
text = promptText
}
}
}
}
};
_httpClient.DefaultRequestHeaders.Clear();
_httpClient.DefaultRequestHeaders.Add("x-api-key", apiKey);
_httpClient.DefaultRequestHeaders.Add("anthropic-version", "2023-06-01");
var json = JsonSerializer.Serialize(requestBody);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
throw new Exception($"Anthropic API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("content")[0]
.GetProperty("text")
.GetString();
// Clean up the response - remove markdown code blocks if present
messageContent = messageContent?.Trim();
if (messageContent?.StartsWith("```json") == true)
{
messageContent = messageContent.Replace("```json", "").Replace("```", "").Trim();
}
if (string.IsNullOrWhiteSpace(messageContent))
{
return new ParsedReceiptData();
}
var parsedData = JsonSerializer.Deserialize<ParsedReceiptData>(messageContent, new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true
});
return parsedData ?? new ParsedReceiptData();
}
}
public class ParsedReceiptData
@@ -440,4 +240,4 @@ namespace MoneyMap.Services
public static ReceiptParseResult Failure(string message) =>
new() { IsSuccess = false, Message = message };
}
}
}

View File

@@ -0,0 +1,219 @@
using System.Text;
using System.Text.Json;
namespace MoneyMap.Services
{
/// <summary>
/// Result of an AI vision API call.
/// </summary>
public class VisionApiResult
{
public bool IsSuccess { get; init; }
public string? Content { get; init; }
public string? ErrorMessage { get; init; }
public static VisionApiResult Success(string content) =>
new() { IsSuccess = true, Content = content };
public static VisionApiResult Failure(string error) =>
new() { IsSuccess = false, ErrorMessage = error };
}
/// <summary>
/// Client for making vision API calls to AI providers.
/// </summary>
public interface IAIVisionClient
{
Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model);
}
/// <summary>
/// OpenAI Vision API client.
/// </summary>
public class OpenAIVisionClient : IAIVisionClient
{
private readonly HttpClient _httpClient;
private readonly IConfiguration _configuration;
private readonly ILogger<OpenAIVisionClient> _logger;
public OpenAIVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OpenAIVisionClient> logger)
{
_httpClient = httpClient;
_configuration = configuration;
_logger = logger;
}
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
{
var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY")
?? _configuration["OpenAI:ApiKey"];
if (string.IsNullOrWhiteSpace(apiKey))
return VisionApiResult.Failure("OpenAI API key not configured. Set OPENAI_API_KEY environment variable or OpenAI:ApiKey in appsettings.json");
var requestBody = new
{
model = model,
messages = new[]
{
new
{
role = "user",
content = new object[]
{
new { type = "text", text = prompt },
new
{
type = "image_url",
image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
}
}
}
},
max_tokens = 2000,
temperature = 0.1
};
try
{
_httpClient.DefaultRequestHeaders.Clear();
_httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}");
var json = JsonSerializer.Serialize(requestBody);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("OpenAI API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
return VisionApiResult.Failure($"OpenAI API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString();
return VisionApiResult.Success(CleanJsonResponse(messageContent));
}
catch (Exception ex)
{
_logger.LogError(ex, "OpenAI Vision API call failed: {Message}", ex.Message);
return VisionApiResult.Failure($"OpenAI API error: {ex.Message}");
}
}
private static string CleanJsonResponse(string? content)
{
var trimmed = content?.Trim() ?? "";
if (trimmed.StartsWith("```json"))
{
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
}
return trimmed;
}
}
/// <summary>
/// Anthropic Claude Vision API client.
/// </summary>
public class ClaudeVisionClient : IAIVisionClient
{
private readonly HttpClient _httpClient;
private readonly IConfiguration _configuration;
private readonly ILogger<ClaudeVisionClient> _logger;
public ClaudeVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<ClaudeVisionClient> logger)
{
_httpClient = httpClient;
_configuration = configuration;
_logger = logger;
}
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
{
var apiKey = Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY")
?? _configuration["Anthropic:ApiKey"];
if (string.IsNullOrWhiteSpace(apiKey))
return VisionApiResult.Failure("Anthropic API key not configured. Set ANTHROPIC_API_KEY environment variable or Anthropic:ApiKey in appsettings.json");
var requestBody = new
{
model = model,
max_tokens = 2000,
messages = new[]
{
new
{
role = "user",
content = new object[]
{
new
{
type = "image",
source = new
{
type = "base64",
media_type = mediaType,
data = base64Image
}
},
new { type = "text", text = prompt }
}
}
}
};
try
{
_httpClient.DefaultRequestHeaders.Clear();
_httpClient.DefaultRequestHeaders.Add("x-api-key", apiKey);
_httpClient.DefaultRequestHeaders.Add("anthropic-version", "2023-06-01");
var json = JsonSerializer.Serialize(requestBody);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("Anthropic API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
return VisionApiResult.Failure($"Anthropic API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("content")[0]
.GetProperty("text")
.GetString();
return VisionApiResult.Success(CleanJsonResponse(messageContent));
}
catch (Exception ex)
{
_logger.LogError(ex, "Claude Vision API call failed: {Message}", ex.Message);
return VisionApiResult.Failure($"Anthropic API error: {ex.Message}");
}
}
private static string CleanJsonResponse(string? content)
{
var trimmed = content?.Trim() ?? "";
if (trimmed.StartsWith("```json"))
{
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
}
return trimmed;
}
}
}

View File

@@ -0,0 +1,61 @@
using ImageMagick;
namespace MoneyMap.Services
{
/// <summary>
/// Service for converting PDF files to images for AI processing.
/// </summary>
public interface IPdfToImageConverter
{
/// <summary>
/// Converts the first page of a PDF to a base64-encoded PNG image.
/// </summary>
Task<string> ConvertFirstPageToBase64Async(string pdfPath);
/// <summary>
/// Converts PDF bytes to a base64-encoded PNG image.
/// </summary>
Task<string> ConvertFirstPageToBase64Async(byte[] pdfBytes);
}
public class PdfToImageConverter : IPdfToImageConverter
{
private const int DefaultDpi = 220;
public Task<string> ConvertFirstPageToBase64Async(string pdfPath)
{
var pdfBytes = File.ReadAllBytes(pdfPath);
return ConvertFirstPageToBase64Async(pdfBytes);
}
public Task<string> ConvertFirstPageToBase64Async(byte[] pdfBytes)
{
return Task.Run(() =>
{
var settings = new MagickReadSettings
{
Density = new Density(DefaultDpi),
BackgroundColor = MagickColors.White,
ColorSpace = ColorSpace.sRGB
};
using var pages = new MagickImageCollection();
pages.Read(pdfBytes, settings);
if (pages.Count == 0)
throw new InvalidOperationException("PDF has no pages");
using var img = (MagickImage)pages[0].Clone();
// Ensure we have a clean 8-bit RGB canvas
img.ColorType = ColorType.TrueColor;
img.Alpha(AlphaOption.Remove); // flatten onto white
img.ResetPage();
// Convert to PNG bytes
var imageBytes = img.ToByteArray(MagickFormat.Png);
return Convert.ToBase64String(imageBytes);
});
}
}
}