Files
MoneyMap/MoneyMap.Core/Services/AIReceiptParser.cs
T
2026-04-20 18:18:20 -04:00

467 lines
20 KiB
C#

using Microsoft.EntityFrameworkCore;
using MoneyMap.Data;
using MoneyMap.Models;
using MoneyMap.Services.AITools;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace MoneyMap.Services
{
public interface IReceiptParser
{
Task<ReceiptParseResult> ParseReceiptAsync(long receiptId, string? model = null, string? notes = null);
}
public class AIReceiptParser : IReceiptParser
{
private readonly MoneyMapContext _db;
private readonly IReceiptManager _receiptManager;
private readonly IPdfToImageConverter _pdfConverter;
private readonly IAIVisionClientResolver _clientResolver;
private readonly IMerchantService _merchantService;
private readonly IAIToolExecutor _toolExecutor;
private readonly IServiceProvider _serviceProvider;
private readonly IConfiguration _configuration;
private readonly ILogger<AIReceiptParser> _logger;
private string? _promptTemplate;
public AIReceiptParser(
MoneyMapContext db,
IReceiptManager receiptManager,
IPdfToImageConverter pdfConverter,
IAIVisionClientResolver clientResolver,
IMerchantService merchantService,
IAIToolExecutor toolExecutor,
IServiceProvider serviceProvider,
IConfiguration configuration,
ILogger<AIReceiptParser> logger)
{
_db = db;
_receiptManager = receiptManager;
_pdfConverter = pdfConverter;
_clientResolver = clientResolver;
_merchantService = merchantService;
_toolExecutor = toolExecutor;
_serviceProvider = serviceProvider;
_configuration = configuration;
_logger = logger;
}
public async Task<ReceiptParseResult> ParseReceiptAsync(long receiptId, string? model = null, string? notes = null)
{
var receipt = await _db.Receipts
.Include(r => r.Transaction)
.FirstOrDefaultAsync(r => r.Id == receiptId);
if (receipt == null)
return ReceiptParseResult.Failure("Receipt not found.");
var filePath = _receiptManager.GetReceiptPhysicalPath(receipt);
if (!File.Exists(filePath))
return ReceiptParseResult.Failure("Receipt file not found on disk.");
// Fall back to receipt.ParsingNotes if notes parameter is null
var effectiveNotes = notes ?? receipt.ParsingNotes;
var selectedModel = model ?? _configuration["AI:ReceiptParsingModel"] ?? "gpt-4o-mini";
var (client, provider) = _clientResolver.Resolve(selectedModel);
// Let model-aware clients evaluate tool support for the specific model
if (client is LlamaCppVisionClient llamaCpp)
llamaCpp.SetCurrentModel(selectedModel);
var parseLog = new ReceiptParseLog
{
ReceiptId = receiptId,
Provider = provider,
Model = selectedModel,
StartedAtUtc = DateTime.UtcNow,
Success = false
};
try
{
var (base64Data, mediaType) = await PrepareImageDataAsync(receipt, filePath);
var promptText = await BuildPromptAsync(receipt, effectiveNotes, client);
var visionResult = await CallVisionClientAsync(client, base64Data, mediaType, promptText, selectedModel);
if (!visionResult.IsSuccess)
{
await SaveParseLogAsync(parseLog, visionResult.ErrorMessage);
return ReceiptParseResult.Failure(visionResult.ErrorMessage!);
}
var parseData = ParseResponse(visionResult.Content);
await ApplyParseResultAsync(receipt, receiptId, parseData, effectiveNotes);
parseLog.Success = true;
parseLog.Confidence = parseData.Confidence;
parseLog.RawProviderPayloadJson = JsonSerializer.Serialize(parseData);
await SaveParseLogAsync(parseLog);
await TryAutoMapReceiptAsync(receipt, receiptId, parseData.SuggestedTransactionId);
var lineCount = parseData.LineItems.Count;
return ReceiptParseResult.Success($"Parsed {lineCount} line items from receipt.");
}
catch (Exception ex)
{
await SaveParseLogAsync(parseLog, ex.Message);
_logger.LogError(ex, "Error parsing receipt {ReceiptId}: {Message}", receiptId, ex.Message);
return ReceiptParseResult.Failure($"Error parsing receipt: {ex.Message}");
}
}
/// <summary>
/// Call the vision client, using tool-use if the client supports it, or enriched prompt fallback for Ollama.
/// </summary>
private async Task<VisionApiResult> CallVisionClientAsync(
IAIVisionClient client, string base64Data, string mediaType, string prompt, string model)
{
if (client is IAIToolAwareVisionClient toolAwareClient && toolAwareClient.SupportsToolUse)
{
_logger.LogInformation("Using tool-aware vision client for model {Model}", model);
var tools = AIToolRegistry.GetAllTools();
return await toolAwareClient.AnalyzeImageWithToolsAsync(
base64Data, mediaType, prompt, model,
tools,
toolCall => _toolExecutor.ExecuteAsync(toolCall),
maxToolRounds: 5);
}
// Fallback: standard call (Ollama gets enriched prompt via BuildPromptAsync)
_logger.LogInformation("Using standard vision client for model {Model} (no tool use)", model);
return await client.AnalyzeImageAsync(base64Data, mediaType, prompt, model);
}
private async Task<(string Base64Data, string MediaType)> PrepareImageDataAsync(Receipt receipt, string filePath)
{
if (receipt.ContentType == "application/pdf")
{
var base64 = await _pdfConverter.ConvertFirstPageToBase64Async(filePath);
return (base64, "image/png");
}
var fileBytes = await File.ReadAllBytesAsync(filePath);
return (Convert.ToBase64String(fileBytes), receipt.ContentType);
}
private async Task<string> BuildPromptAsync(Receipt receipt, string? userNotes, IAIVisionClient client)
{
var promptText = await LoadPromptTemplateAsync();
var transactionName = receipt.Transaction?.Name;
if (!string.IsNullOrWhiteSpace(transactionName))
{
promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear.";
}
var parsingNotes = _configuration["AI:ReceiptParsingNotes"];
if (!string.IsNullOrWhiteSpace(parsingNotes))
{
promptText += $"\n\nAdditional notes: {parsingNotes}";
}
if (!string.IsNullOrWhiteSpace(userNotes))
{
promptText += $"\n\nUser notes for this receipt: {userNotes}";
}
// Add tool-use or enriched context instructions based on client capability
if (client is IAIToolAwareVisionClient toolAwareClient && toolAwareClient.SupportsToolUse)
{
// Tool-aware client: instruct to use tools for lookups
promptText += @"
TOOL USE INSTRUCTIONS:
You have access to tools that can query the application's database. You MUST call them before generating your JSON response:
1. Call search_categories to find existing category names. Use ONLY categories returned by this tool for suggestedCategory and line item category fields. Do not invent new category names.
2. Call search_transactions to find a matching bank transaction for this receipt (search by date, amount, merchant name). Set suggestedTransactionId to the numeric ID of the best match, or null if no good match. Remember: suggestedTransactionId must be a JSON integer or null, never a string.
3. Call search_merchants to look up the correct merchant name.";
}
else
{
// Non-tool client (Ollama): inject pre-fetched database context
try
{
var merchantHint = receipt.Transaction?.Name ?? receipt.Merchant;
var enrichedContext = await _toolExecutor.GetEnrichedContextAsync(
receipt.ReceiptDate,
receipt.Total,
merchantHint);
promptText += $"\n\n{enrichedContext}";
promptText += @"
Using the database context above, populate these fields in your JSON response:
- suggestedCategory: Use the best matching category name from the EXISTING CATEGORIES list. Do not invent new categories.
- suggestedTransactionId: Use the numeric transaction ID from CANDIDATE TRANSACTIONS that best matches this receipt, or null if none match. Must be a JSON integer or null, never a string.
- For each line item, set category to the best matching category from the EXISTING CATEGORIES list.";
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to get enriched context for Ollama, proceeding without it");
}
}
promptText += "\n\nRespond ONLY with valid JSON, no other text.";
return promptText;
}
private static ParsedReceiptData ParseResponse(string? content)
{
if (string.IsNullOrWhiteSpace(content))
return new ParsedReceiptData();
return JsonSerializer.Deserialize<ParsedReceiptData>(content, new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true
}) ?? new ParsedReceiptData();
}
private async Task ApplyParseResultAsync(Receipt receipt, long receiptId, ParsedReceiptData parseData, string? notes)
{
// Update receipt fields
receipt.ParsingNotes = notes;
receipt.Merchant = parseData.Merchant;
receipt.Total = parseData.Total;
receipt.Subtotal = parseData.Subtotal;
receipt.Tax = parseData.Tax;
receipt.ReceiptDate = parseData.ReceiptDate;
receipt.DueDate = parseData.DueDate;
// Update transaction merchant if needed
if (receipt.Transaction != null &&
!string.IsNullOrWhiteSpace(parseData.Merchant) &&
receipt.Transaction.MerchantId == null)
{
var merchantId = await _merchantService.GetOrCreateIdAsync(parseData.Merchant);
receipt.Transaction.MerchantId = merchantId;
}
// Update transaction category if AI suggested one and the transaction has no category
if (receipt.Transaction != null &&
!string.IsNullOrWhiteSpace(parseData.SuggestedCategory) &&
string.IsNullOrWhiteSpace(receipt.Transaction.Category))
{
receipt.Transaction.Category = parseData.SuggestedCategory;
_logger.LogInformation("Set transaction {TransactionId} category to '{Category}' from AI suggestion",
receipt.Transaction.Id, parseData.SuggestedCategory);
}
// Replace line items
var existingItems = await _db.ReceiptLineItems
.Where(li => li.ReceiptId == receiptId)
.ToListAsync();
_db.ReceiptLineItems.RemoveRange(existingItems);
var lineItems = parseData.LineItems.Select((item, index) => new ReceiptLineItem
{
ReceiptId = receiptId,
LineNumber = index + 1,
Description = item.Description,
Sku = item.Upc,
Quantity = item.Quantity,
UnitPrice = item.UnitPrice,
LineTotal = item.LineTotal,
Category = item.Category,
Voided = item.Voided
}).ToList();
_db.ReceiptLineItems.AddRange(lineItems);
await _db.SaveChangesAsync();
}
private async Task SaveParseLogAsync(ReceiptParseLog parseLog, string? error = null)
{
parseLog.Error = error;
parseLog.CompletedAtUtc = DateTime.UtcNow;
_db.ReceiptParseLogs.Add(parseLog);
await _db.SaveChangesAsync();
}
private async Task TryAutoMapReceiptAsync(Receipt receipt, long receiptId, long? suggestedTransactionId)
{
// If AI suggested a specific transaction, try mapping directly
if (!receipt.TransactionId.HasValue && suggestedTransactionId.HasValue)
{
try
{
var transaction = await _db.Transactions.FindAsync(suggestedTransactionId.Value);
if (transaction != null)
{
// Verify the transaction isn't already mapped to another receipt
var alreadyMapped = await _db.Receipts
.AnyAsync(r => r.TransactionId == suggestedTransactionId.Value && r.Id != receiptId);
if (!alreadyMapped)
{
var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, suggestedTransactionId.Value);
if (success)
{
_logger.LogInformation(
"AI-suggested mapping: receipt {ReceiptId} → transaction {TransactionId}",
receiptId, suggestedTransactionId.Value);
return;
}
}
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "AI-suggested mapping failed for receipt {ReceiptId} → transaction {TransactionId}",
receiptId, suggestedTransactionId.Value);
}
}
// Fall back to the existing auto-mapper
if (receipt.TransactionId.HasValue)
return;
try
{
using var scope = _serviceProvider.CreateScope();
var autoMapper = scope.ServiceProvider.GetRequiredService<IReceiptAutoMapper>();
await autoMapper.AutoMapReceiptAsync(receiptId);
_logger.LogInformation("Auto-mapping completed for receipt {ReceiptId}", receiptId);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Auto-mapping failed for receipt {ReceiptId}: {Message}", receiptId, ex.Message);
}
}
private async Task<string> LoadPromptTemplateAsync()
{
if (_promptTemplate != null)
return _promptTemplate;
var promptPath = Path.Combine(AppContext.BaseDirectory, "Prompts", "ReceiptParserPrompt.txt");
if (!File.Exists(promptPath))
throw new FileNotFoundException($"Receipt parser prompt template not found at: {promptPath}");
_promptTemplate = await File.ReadAllTextAsync(promptPath);
return _promptTemplate;
}
}
/// <summary>
/// Resolves the appropriate AI vision client based on model name.
/// </summary>
public interface IAIVisionClientResolver
{
(IAIVisionClient Client, string Provider) Resolve(string model);
}
public class AIVisionClientResolver : IAIVisionClientResolver
{
private readonly OpenAIVisionClient _openAIClient;
private readonly ClaudeVisionClient _claudeClient;
private readonly OllamaVisionClient _ollamaClient;
private readonly LlamaCppVisionClient _llamaCppClient;
public AIVisionClientResolver(
OpenAIVisionClient openAIClient,
ClaudeVisionClient claudeClient,
OllamaVisionClient ollamaClient,
LlamaCppVisionClient llamaCppClient)
{
_openAIClient = openAIClient;
_claudeClient = claudeClient;
_ollamaClient = ollamaClient;
_llamaCppClient = llamaCppClient;
}
public (IAIVisionClient Client, string Provider) Resolve(string model)
{
if (model.StartsWith("llamacpp:"))
return (_llamaCppClient, "LlamaCpp");
if (model.StartsWith("ollama:"))
return (_ollamaClient, "Ollama");
if (model.StartsWith("claude-"))
return (_claudeClient, "Anthropic");
return (_openAIClient, "OpenAI");
}
}
public class ParsedReceiptData
{
public string? Merchant { get; set; }
public DateTime? ReceiptDate { get; set; }
public DateTime? DueDate { get; set; }
public decimal? Subtotal { get; set; }
public decimal? Tax { get; set; }
public decimal? Total { get; set; }
public decimal Confidence { get; set; } = 0.5m;
public string? SuggestedCategory { get; set; }
[JsonConverter(typeof(NullableLongConverter))]
public long? SuggestedTransactionId { get; set; }
public List<ParsedLineItem> LineItems { get; set; } = new();
}
public class ParsedLineItem
{
public string Description { get; set; } = "";
public string? Upc { get; set; }
public decimal? Quantity { get; set; }
public decimal? UnitPrice { get; set; }
public decimal LineTotal { get; set; }
public string? Category { get; set; }
public bool Voided { get; set; }
}
public class ReceiptParseResult
{
public bool IsSuccess { get; init; }
public string? Message { get; init; }
public static ReceiptParseResult Success(string message) =>
new() { IsSuccess = true, Message = message };
public static ReceiptParseResult Failure(string message) =>
new() { IsSuccess = false, Message = message };
}
/// <summary>
/// Handles AI responses that return suggestedTransactionId as a string ("null", "N/A", "123")
/// instead of as a JSON number or null.
/// </summary>
public class NullableLongConverter : JsonConverter<long?>
{
public override long? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
{
switch (reader.TokenType)
{
case JsonTokenType.Number:
return reader.GetInt64();
case JsonTokenType.String:
var str = reader.GetString();
if (string.IsNullOrWhiteSpace(str) ||
str.Equals("null", StringComparison.OrdinalIgnoreCase) ||
str.Equals("N/A", StringComparison.OrdinalIgnoreCase) ||
str.Equals("none", StringComparison.OrdinalIgnoreCase))
return null;
return long.TryParse(str, out var val) ? val : null;
case JsonTokenType.Null:
return null;
default:
reader.Skip();
return null;
}
}
public override void Write(Utf8JsonWriter writer, long? value, JsonSerializerOptions options)
{
if (value.HasValue)
writer.WriteNumberValue(value.Value);
else
writer.WriteNullValue();
}
}
}