Refactor: Extract AI vision clients from AIReceiptParser

Extract for better separation of concerns:
- Services/PdfToImageConverter.cs - PDF to image conversion using ImageMagick
- Services/AIVisionClient.cs - OpenAI and Claude vision API clients
  - IAIVisionClient interface
  - OpenAIVisionClient, ClaudeVisionClient implementations

AIReceiptParser now orchestrates using injected services.
Adds proper logging for auto-mapping operations.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-24 21:11:56 -05:00
parent ea7b2c2a3c
commit 9f64c7784a
3 changed files with 329 additions and 249 deletions

View File

@@ -1,8 +1,6 @@
using ImageMagick;
using Microsoft.EntityFrameworkCore; using Microsoft.EntityFrameworkCore;
using MoneyMap.Data; using MoneyMap.Data;
using MoneyMap.Models; using MoneyMap.Models;
using System.Text;
using System.Text.Json; using System.Text.Json;
namespace MoneyMap.Services namespace MoneyMap.Services
@@ -16,26 +14,32 @@ namespace MoneyMap.Services
{ {
private readonly MoneyMapContext _db; private readonly MoneyMapContext _db;
private readonly IWebHostEnvironment _environment; private readonly IWebHostEnvironment _environment;
private readonly IConfiguration _configuration; private readonly IPdfToImageConverter _pdfConverter;
private readonly HttpClient _httpClient; private readonly OpenAIVisionClient _openAIClient;
private readonly ClaudeVisionClient _claudeClient;
private readonly IMerchantService _merchantService; private readonly IMerchantService _merchantService;
private readonly IServiceProvider _serviceProvider; private readonly IServiceProvider _serviceProvider;
private readonly ILogger<AIReceiptParser> _logger;
private string? _promptTemplate; private string? _promptTemplate;
public AIReceiptParser( public AIReceiptParser(
MoneyMapContext db, MoneyMapContext db,
IWebHostEnvironment environment, IWebHostEnvironment environment,
IConfiguration configuration, IPdfToImageConverter pdfConverter,
HttpClient httpClient, OpenAIVisionClient openAIClient,
ClaudeVisionClient claudeClient,
IMerchantService merchantService, IMerchantService merchantService,
IServiceProvider serviceProvider) IServiceProvider serviceProvider,
ILogger<AIReceiptParser> logger)
{ {
_db = db; _db = db;
_environment = environment; _environment = environment;
_configuration = configuration; _pdfConverter = pdfConverter;
_httpClient = httpClient; _openAIClient = openAIClient;
_claudeClient = claudeClient;
_merchantService = merchantService; _merchantService = merchantService;
_serviceProvider = serviceProvider; _serviceProvider = serviceProvider;
_logger = logger;
} }
public async Task<ReceiptParseResult> ParseReceiptAsync(long receiptId, string? model = null) public async Task<ReceiptParseResult> ParseReceiptAsync(long receiptId, string? model = null)
@@ -47,30 +51,10 @@ namespace MoneyMap.Services
if (receipt == null) if (receipt == null)
return ReceiptParseResult.Failure("Receipt not found."); return ReceiptParseResult.Failure("Receipt not found.");
// Default to gpt-4o-mini if no model specified
var selectedModel = model ?? "gpt-4o-mini"; var selectedModel = model ?? "gpt-4o-mini";
// Determine provider based on model name
var isClaude = selectedModel.StartsWith("claude-"); var isClaude = selectedModel.StartsWith("claude-");
var provider = isClaude ? "Anthropic" : "OpenAI"; var provider = isClaude ? "Anthropic" : "OpenAI";
// Get appropriate API key
string? apiKey;
if (isClaude)
{
apiKey = Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY")
?? _configuration["Anthropic:ApiKey"];
if (string.IsNullOrWhiteSpace(apiKey))
return ReceiptParseResult.Failure("Anthropic API key not configured. Set ANTHROPIC_API_KEY environment variable or Anthropic:ApiKey in appsettings.json");
}
else
{
apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY")
?? _configuration["OpenAI:ApiKey"];
if (string.IsNullOrWhiteSpace(apiKey))
return ReceiptParseResult.Failure("OpenAI API key not configured. Set OPENAI_API_KEY environment variable or OpenAI:ApiKey in appsettings.json");
}
var filePath = Path.Combine(_environment.WebRootPath, receipt.StoragePath.Replace("/", Path.DirectorySeparatorChar.ToString())); var filePath = Path.Combine(_environment.WebRootPath, receipt.StoragePath.Replace("/", Path.DirectorySeparatorChar.ToString()));
if (!File.Exists(filePath)) if (!File.Exists(filePath))
@@ -92,23 +76,45 @@ namespace MoneyMap.Services
if (receipt.ContentType == "application/pdf") if (receipt.ContentType == "application/pdf")
{ {
// Convert PDF to image using ImageMagick base64Data = await _pdfConverter.ConvertFirstPageToBase64Async(filePath);
base64Data = await ConvertPdfToBase64ImageAsync(filePath);
mediaType = "image/png"; mediaType = "image/png";
} }
else else
{ {
// For images, use directly
var fileBytes = await File.ReadAllBytesAsync(filePath); var fileBytes = await File.ReadAllBytesAsync(filePath);
base64Data = Convert.ToBase64String(fileBytes); base64Data = Convert.ToBase64String(fileBytes);
mediaType = receipt.ContentType; mediaType = receipt.ContentType;
} }
// Call Vision API with transaction name context // Build prompt
var promptText = await LoadPromptTemplateAsync();
var transactionName = receipt.Transaction?.Name; var transactionName = receipt.Transaction?.Name;
var parseData = isClaude if (!string.IsNullOrWhiteSpace(transactionName))
? await CallClaudeVisionAsync(apiKey, base64Data, mediaType, selectedModel, transactionName) {
: await CallOpenAIVisionAsync(apiKey, base64Data, mediaType, selectedModel, transactionName); promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear.";
}
promptText += "\n\nRespond ONLY with valid JSON, no other text.";
// Call appropriate vision API
var client = isClaude ? (IAIVisionClient)_claudeClient : _openAIClient;
var visionResult = await client.AnalyzeImageAsync(base64Data, mediaType, promptText, selectedModel);
if (!visionResult.IsSuccess)
{
parseLog.Error = visionResult.ErrorMessage;
parseLog.CompletedAtUtc = DateTime.UtcNow;
_db.ReceiptParseLogs.Add(parseLog);
await _db.SaveChangesAsync();
return ReceiptParseResult.Failure(visionResult.ErrorMessage!);
}
// Parse the JSON response
var parseData = string.IsNullOrWhiteSpace(visionResult.Content)
? new ParsedReceiptData()
: JsonSerializer.Deserialize<ParsedReceiptData>(visionResult.Content, new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true
}) ?? new ParsedReceiptData();
// Update receipt with parsed data // Update receipt with parsed data
receipt.Merchant = parseData.Merchant; receipt.Merchant = parseData.Merchant;
@@ -118,7 +124,7 @@ namespace MoneyMap.Services
receipt.ReceiptDate = parseData.ReceiptDate; receipt.ReceiptDate = parseData.ReceiptDate;
receipt.DueDate = parseData.DueDate; receipt.DueDate = parseData.DueDate;
// Update transaction merchant if we extracted one and transaction doesn't have one yet // Update transaction merchant if extracted and transaction doesn't have one
if (receipt.Transaction != null && if (receipt.Transaction != null &&
!string.IsNullOrWhiteSpace(parseData.Merchant) && !string.IsNullOrWhiteSpace(parseData.Merchant) &&
receipt.Transaction.MerchantId == null) receipt.Transaction.MerchantId == null)
@@ -156,19 +162,19 @@ namespace MoneyMap.Services
_db.ReceiptParseLogs.Add(parseLog); _db.ReceiptParseLogs.Add(parseLog);
await _db.SaveChangesAsync(); await _db.SaveChangesAsync();
// Attempt auto-mapping after successful parse (only if receipt is not already mapped) // Attempt auto-mapping after successful parse
if (!receipt.TransactionId.HasValue) if (!receipt.TransactionId.HasValue)
{ {
try try
{ {
// Use service locator pattern to avoid circular dependency
using var scope = _serviceProvider.CreateScope(); using var scope = _serviceProvider.CreateScope();
var autoMapper = scope.ServiceProvider.GetRequiredService<IReceiptAutoMapper>(); var autoMapper = scope.ServiceProvider.GetRequiredService<IReceiptAutoMapper>();
await autoMapper.AutoMapReceiptAsync(receiptId); await autoMapper.AutoMapReceiptAsync(receiptId);
_logger.LogInformation("Auto-mapping completed for receipt {ReceiptId}", receiptId);
} }
catch catch (Exception ex)
{ {
// Ignore auto-mapping errors - parsing was successful _logger.LogWarning(ex, "Auto-mapping failed for receipt {ReceiptId}: {Message}", receiptId, ex.Message);
} }
} }
@@ -181,44 +187,11 @@ namespace MoneyMap.Services
_db.ReceiptParseLogs.Add(parseLog); _db.ReceiptParseLogs.Add(parseLog);
await _db.SaveChangesAsync(); await _db.SaveChangesAsync();
_logger.LogError(ex, "Error parsing receipt {ReceiptId}: {Message}", receiptId, ex.Message);
return ReceiptParseResult.Failure($"Error parsing receipt: {ex.Message}"); return ReceiptParseResult.Failure($"Error parsing receipt: {ex.Message}");
} }
} }
private async Task<string> ConvertPdfToBase64ImageAsync(string pdfPath)
{
return await Task.Run(() =>
{
var pdfBytes = File.ReadAllBytes(pdfPath);
// Render settings: 220 DPI for good quality
var settings = new MagickReadSettings
{
Density = new Density(220),
BackgroundColor = MagickColors.White,
ColorSpace = ColorSpace.sRGB
};
using var pages = new MagickImageCollection();
pages.Read(pdfBytes, settings);
// Use first page only
if (pages.Count == 0)
throw new Exception("PDF has no pages");
using var img = (MagickImage)pages[0].Clone();
// Ensure we have a clean 8-bit RGB canvas
img.ColorType = ColorType.TrueColor;
img.Alpha(AlphaOption.Remove); // flatten onto white
img.ResetPage();
// Convert to PNG bytes
var imageBytes = img.ToByteArray(MagickFormat.Png);
return Convert.ToBase64String(imageBytes);
});
}
private async Task<string> LoadPromptTemplateAsync() private async Task<string> LoadPromptTemplateAsync()
{ {
if (_promptTemplate != null) if (_promptTemplate != null)
@@ -232,179 +205,6 @@ namespace MoneyMap.Services
_promptTemplate = await File.ReadAllTextAsync(promptPath); _promptTemplate = await File.ReadAllTextAsync(promptPath);
return _promptTemplate; return _promptTemplate;
} }
private async Task<ParsedReceiptData> CallOpenAIVisionAsync(string apiKey, string base64Image, string mediaType, string model, string? transactionName = null)
{
// Load the prompt template from file
var promptText = await LoadPromptTemplateAsync();
// Add transaction context if available
if (!string.IsNullOrWhiteSpace(transactionName))
{
promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear.";
}
promptText += "\n\nRespond ONLY with valid JSON, no other text.";
var requestBody = new
{
model = model,
messages = new[]
{
new
{
role = "user",
content = new object[]
{
new
{
type = "text",
text = promptText
},
new
{
type = "image_url",
image_url = new
{
url = $"data:{mediaType};base64,{base64Image}"
}
}
}
}
},
max_tokens = 2000,
temperature = 0.1
};
_httpClient.DefaultRequestHeaders.Clear();
_httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}");
var json = JsonSerializer.Serialize(requestBody);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
throw new Exception($"OpenAI API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString();
// Clean up the response - remove markdown code blocks if present
messageContent = messageContent?.Trim();
if (messageContent?.StartsWith("```json") == true)
{
messageContent = messageContent.Replace("```json", "").Replace("```", "").Trim();
}
if (string.IsNullOrWhiteSpace(messageContent))
{
return new ParsedReceiptData();
}
var parsedData = JsonSerializer.Deserialize<ParsedReceiptData>(messageContent, new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true
});
return parsedData ?? new ParsedReceiptData();
}
private async Task<ParsedReceiptData> CallClaudeVisionAsync(string apiKey, string base64Image, string mediaType, string model, string? transactionName = null)
{
// Load the prompt template from file
var promptText = await LoadPromptTemplateAsync();
// Add transaction context if available
if (!string.IsNullOrWhiteSpace(transactionName))
{
promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear.";
}
promptText += "\n\nRespond ONLY with valid JSON, no other text.";
var requestBody = new
{
model = model,
max_tokens = 2000,
messages = new[]
{
new
{
role = "user",
content = new object[]
{
new
{
type = "image",
source = new
{
type = "base64",
media_type = mediaType,
data = base64Image
}
},
new
{
type = "text",
text = promptText
}
}
}
}
};
_httpClient.DefaultRequestHeaders.Clear();
_httpClient.DefaultRequestHeaders.Add("x-api-key", apiKey);
_httpClient.DefaultRequestHeaders.Add("anthropic-version", "2023-06-01");
var json = JsonSerializer.Serialize(requestBody);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
throw new Exception($"Anthropic API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("content")[0]
.GetProperty("text")
.GetString();
// Clean up the response - remove markdown code blocks if present
messageContent = messageContent?.Trim();
if (messageContent?.StartsWith("```json") == true)
{
messageContent = messageContent.Replace("```json", "").Replace("```", "").Trim();
}
if (string.IsNullOrWhiteSpace(messageContent))
{
return new ParsedReceiptData();
}
var parsedData = JsonSerializer.Deserialize<ParsedReceiptData>(messageContent, new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true
});
return parsedData ?? new ParsedReceiptData();
}
} }
public class ParsedReceiptData public class ParsedReceiptData

View File

@@ -0,0 +1,219 @@
using System.Text;
using System.Text.Json;
namespace MoneyMap.Services
{
/// <summary>
/// Result of an AI vision API call.
/// </summary>
public class VisionApiResult
{
public bool IsSuccess { get; init; }
public string? Content { get; init; }
public string? ErrorMessage { get; init; }
public static VisionApiResult Success(string content) =>
new() { IsSuccess = true, Content = content };
public static VisionApiResult Failure(string error) =>
new() { IsSuccess = false, ErrorMessage = error };
}
/// <summary>
/// Client for making vision API calls to AI providers.
/// </summary>
public interface IAIVisionClient
{
Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model);
}
/// <summary>
/// OpenAI Vision API client.
/// </summary>
public class OpenAIVisionClient : IAIVisionClient
{
private readonly HttpClient _httpClient;
private readonly IConfiguration _configuration;
private readonly ILogger<OpenAIVisionClient> _logger;
public OpenAIVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OpenAIVisionClient> logger)
{
_httpClient = httpClient;
_configuration = configuration;
_logger = logger;
}
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
{
var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY")
?? _configuration["OpenAI:ApiKey"];
if (string.IsNullOrWhiteSpace(apiKey))
return VisionApiResult.Failure("OpenAI API key not configured. Set OPENAI_API_KEY environment variable or OpenAI:ApiKey in appsettings.json");
var requestBody = new
{
model = model,
messages = new[]
{
new
{
role = "user",
content = new object[]
{
new { type = "text", text = prompt },
new
{
type = "image_url",
image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
}
}
}
},
max_tokens = 2000,
temperature = 0.1
};
try
{
_httpClient.DefaultRequestHeaders.Clear();
_httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}");
var json = JsonSerializer.Serialize(requestBody);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("OpenAI API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
return VisionApiResult.Failure($"OpenAI API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString();
return VisionApiResult.Success(CleanJsonResponse(messageContent));
}
catch (Exception ex)
{
_logger.LogError(ex, "OpenAI Vision API call failed: {Message}", ex.Message);
return VisionApiResult.Failure($"OpenAI API error: {ex.Message}");
}
}
private static string CleanJsonResponse(string? content)
{
var trimmed = content?.Trim() ?? "";
if (trimmed.StartsWith("```json"))
{
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
}
return trimmed;
}
}
/// <summary>
/// Anthropic Claude Vision API client.
/// </summary>
public class ClaudeVisionClient : IAIVisionClient
{
private readonly HttpClient _httpClient;
private readonly IConfiguration _configuration;
private readonly ILogger<ClaudeVisionClient> _logger;
public ClaudeVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<ClaudeVisionClient> logger)
{
_httpClient = httpClient;
_configuration = configuration;
_logger = logger;
}
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
{
var apiKey = Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY")
?? _configuration["Anthropic:ApiKey"];
if (string.IsNullOrWhiteSpace(apiKey))
return VisionApiResult.Failure("Anthropic API key not configured. Set ANTHROPIC_API_KEY environment variable or Anthropic:ApiKey in appsettings.json");
var requestBody = new
{
model = model,
max_tokens = 2000,
messages = new[]
{
new
{
role = "user",
content = new object[]
{
new
{
type = "image",
source = new
{
type = "base64",
media_type = mediaType,
data = base64Image
}
},
new { type = "text", text = prompt }
}
}
}
};
try
{
_httpClient.DefaultRequestHeaders.Clear();
_httpClient.DefaultRequestHeaders.Add("x-api-key", apiKey);
_httpClient.DefaultRequestHeaders.Add("anthropic-version", "2023-06-01");
var json = JsonSerializer.Serialize(requestBody);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("Anthropic API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
return VisionApiResult.Failure($"Anthropic API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("content")[0]
.GetProperty("text")
.GetString();
return VisionApiResult.Success(CleanJsonResponse(messageContent));
}
catch (Exception ex)
{
_logger.LogError(ex, "Claude Vision API call failed: {Message}", ex.Message);
return VisionApiResult.Failure($"Anthropic API error: {ex.Message}");
}
}
private static string CleanJsonResponse(string? content)
{
var trimmed = content?.Trim() ?? "";
if (trimmed.StartsWith("```json"))
{
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
}
return trimmed;
}
}
}

View File

@@ -0,0 +1,61 @@
using ImageMagick;
namespace MoneyMap.Services
{
/// <summary>
/// Service for converting PDF files to images for AI processing.
/// </summary>
public interface IPdfToImageConverter
{
/// <summary>
/// Converts the first page of a PDF to a base64-encoded PNG image.
/// </summary>
Task<string> ConvertFirstPageToBase64Async(string pdfPath);
/// <summary>
/// Converts PDF bytes to a base64-encoded PNG image.
/// </summary>
Task<string> ConvertFirstPageToBase64Async(byte[] pdfBytes);
}
public class PdfToImageConverter : IPdfToImageConverter
{
private const int DefaultDpi = 220;
public Task<string> ConvertFirstPageToBase64Async(string pdfPath)
{
var pdfBytes = File.ReadAllBytes(pdfPath);
return ConvertFirstPageToBase64Async(pdfBytes);
}
public Task<string> ConvertFirstPageToBase64Async(byte[] pdfBytes)
{
return Task.Run(() =>
{
var settings = new MagickReadSettings
{
Density = new Density(DefaultDpi),
BackgroundColor = MagickColors.White,
ColorSpace = ColorSpace.sRGB
};
using var pages = new MagickImageCollection();
pages.Read(pdfBytes, settings);
if (pages.Count == 0)
throw new InvalidOperationException("PDF has no pages");
using var img = (MagickImage)pages[0].Clone();
// Ensure we have a clean 8-bit RGB canvas
img.ColorType = ColorType.TrueColor;
img.Alpha(AlphaOption.Remove); // flatten onto white
img.ResetPage();
// Convert to PNG bytes
var imageBytes = img.ToByteArray(MagickFormat.Png);
return Convert.ToBase64String(imageBytes);
});
}
}
}