Files
MoneyMap/MoneyMap.Core/Services/ReceiptAutoMapper.cs
T
2026-04-20 18:18:20 -04:00

496 lines
19 KiB
C#

using Microsoft.EntityFrameworkCore;
using MoneyMap.Data;
using MoneyMap.Models;
using System.Text;
using System.Text.Json;
namespace MoneyMap.Services
{
public interface IReceiptAutoMapper
{
Task<ReceiptAutoMapResult> AutoMapReceiptAsync(long receiptId);
Task<BulkAutoMapResult> AutoMapUnmappedReceiptsAsync();
Task<List<ScoredCandidate>> GetScoredCandidatesAsync(long receiptId);
}
public class ReceiptAutoMapper : IReceiptAutoMapper
{
private readonly MoneyMapContext _db;
private readonly IReceiptManager _receiptManager;
private readonly LlamaCppVisionClient _llmClient;
private readonly ILogger<ReceiptAutoMapper> _logger;
// Confidence thresholds
private const double AutoMapThreshold = 0.85; // Auto-map if score >= 85%
private const double LlmReviewThreshold = 0.50; // Use LLM if score between 50-85%
public ReceiptAutoMapper(
MoneyMapContext db,
IReceiptManager receiptManager,
LlamaCppVisionClient llmClient,
ILogger<ReceiptAutoMapper> logger)
{
_db = db;
_receiptManager = receiptManager;
_llmClient = llmClient;
_logger = logger;
}
public async Task<ReceiptAutoMapResult> AutoMapReceiptAsync(long receiptId)
{
var receipt = await _db.Receipts
.Include(r => r.Transaction)
.FirstOrDefaultAsync(r => r.Id == receiptId);
if (receipt == null)
return ReceiptAutoMapResult.Failure("Receipt not found.");
if (receipt.TransactionId.HasValue)
return ReceiptAutoMapResult.AlreadyMapped(receipt.TransactionId.Value);
if (string.IsNullOrWhiteSpace(receipt.Merchant) && !receipt.ReceiptDate.HasValue && !receipt.Total.HasValue)
return ReceiptAutoMapResult.NotParsed();
var scoredCandidates = await FindAndScoreCandidatesAsync(receipt);
if (scoredCandidates.Count == 0)
return ReceiptAutoMapResult.NoMatch();
var bestMatch = scoredCandidates[0];
// High confidence - auto-map directly
if (bestMatch.Score >= AutoMapThreshold)
{
_logger.LogInformation(
"Auto-mapping receipt {ReceiptId} to transaction {TransactionId} with score {Score:P0}",
receiptId, bestMatch.Transaction.Id, bestMatch.Score);
var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, bestMatch.Transaction.Id);
return success
? ReceiptAutoMapResult.Success(bestMatch.Transaction.Id)
: ReceiptAutoMapResult.Failure("Failed to map receipt to transaction.");
}
// Medium confidence - use LLM to decide
if (bestMatch.Score >= LlmReviewThreshold)
{
var topCandidates = scoredCandidates.Take(5).ToList();
var llmResult = await GetLlmMatchDecisionAsync(receipt, topCandidates);
if (llmResult != null && llmResult.Confidence >= 0.7)
{
_logger.LogInformation(
"LLM matched receipt {ReceiptId} to transaction {TransactionId} with confidence {Confidence:P0}",
receiptId, llmResult.TransactionId, llmResult.Confidence);
var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, llmResult.TransactionId);
return success
? ReceiptAutoMapResult.Success(llmResult.TransactionId)
: ReceiptAutoMapResult.Failure("Failed to map receipt to transaction.");
}
// LLM uncertain - return multiple matches for manual review
return ReceiptAutoMapResult.WithMultipleMatches(
topCandidates.Select(c => c.Transaction).ToList());
}
// Low confidence - no good matches
if (scoredCandidates.Count > 1)
{
return ReceiptAutoMapResult.WithMultipleMatches(
scoredCandidates.Take(5).Select(c => c.Transaction).ToList());
}
return ReceiptAutoMapResult.NoMatch();
}
public async Task<BulkAutoMapResult> AutoMapUnmappedReceiptsAsync()
{
var unmappedReceipts = await _db.Receipts
.Where(r => r.TransactionId == null)
.Where(r => r.Merchant != null || r.ReceiptDate != null || r.Total != null)
.ToListAsync();
var result = new BulkAutoMapResult();
foreach (var receipt in unmappedReceipts)
{
var mapResult = await AutoMapReceiptAsync(receipt.Id);
if (mapResult.Status == AutoMapStatus.Success)
result.MappedCount++;
else if (mapResult.Status == AutoMapStatus.MultipleMatches)
result.MultipleMatchesCount++;
else if (mapResult.Status == AutoMapStatus.NoMatch)
result.NoMatchCount++;
}
result.TotalProcessed = unmappedReceipts.Count;
return result;
}
public async Task<List<ScoredCandidate>> GetScoredCandidatesAsync(long receiptId)
{
var receipt = await _db.Receipts
.FirstOrDefaultAsync(r => r.Id == receiptId);
if (receipt == null)
return new List<ScoredCandidate>();
return await FindAndScoreCandidatesAsync(receipt);
}
private async Task<List<ScoredCandidate>> FindAndScoreCandidatesAsync(Receipt receipt)
{
// Get transactions in a reasonable date range
var query = _db.Transactions
.Include(t => t.Card)
.Include(t => t.Account)
.Include(t => t.Merchant)
.AsQueryable();
// Date range: use receipt date or due date
// Transactions can't occur before the receipt date (you get a receipt when you buy something)
DateTime? targetDate = receipt.ReceiptDate;
DateTime? dueDate = receipt.DueDate;
if (targetDate.HasValue || dueDate.HasValue)
{
// Min date is the receipt date - transactions can't precede the receipt
var minDate = targetDate ?? dueDate!.Value;
var maxDate = (dueDate ?? targetDate!.Value).AddDays(7);
query = query.Where(t => t.Date >= minDate && t.Date <= maxDate);
}
else
{
// No date info - can't match reliably
return new List<ScoredCandidate>();
}
var candidates = await query.ToListAsync();
// Exclude transactions that already have receipts
var transactionsWithReceipts = await _db.Receipts
.Where(r => r.TransactionId != null && r.Id != receipt.Id)
.Select(r => r.TransactionId!.Value)
.Distinct()
.ToListAsync();
candidates = candidates
.Where(t => !transactionsWithReceipts.Contains(t.Id))
.ToList();
// Score each candidate
var scored = candidates
.Select(t => new ScoredCandidate
{
Transaction = t,
Score = CalculateMatchScore(receipt, t)
})
.Where(s => s.Score > 0.1) // Filter out very poor matches
.OrderByDescending(s => s.Score)
.ToList();
return scored;
}
private double CalculateMatchScore(Receipt receipt, Transaction transaction)
{
double score = 0;
double totalWeight = 0;
// Amount matching (weight: 40%)
if (receipt.Total.HasValue)
{
const double amountWeight = 0.40;
totalWeight += amountWeight;
var receiptAmount = Math.Abs(receipt.Total.Value);
var transactionAmount = Math.Abs(transaction.Amount);
if (receiptAmount > 0)
{
var difference = (double)(Math.Abs(receiptAmount - transactionAmount) / receiptAmount);
if (difference == 0)
score += amountWeight * 1.0;
else if (difference <= 0.01) // Within 1%
score += amountWeight * 0.95;
else if (difference <= 0.05) // Within 5%
score += amountWeight * 0.80;
else if (difference <= 0.10) // Within 10%
score += amountWeight * 0.60;
else if (difference <= 0.20) // Within 20%
score += amountWeight * 0.30;
// Beyond 20% = 0 points
}
}
// Date matching (weight: 25%)
if (receipt.ReceiptDate.HasValue)
{
const double dateWeight = 0.25;
totalWeight += dateWeight;
var daysDiff = Math.Abs((transaction.Date - receipt.ReceiptDate.Value).TotalDays);
if (daysDiff == 0)
score += dateWeight * 1.0;
else if (daysDiff <= 1)
score += dateWeight * 0.90;
else if (daysDiff <= 3)
score += dateWeight * 0.70;
else if (daysDiff <= 5)
score += dateWeight * 0.50;
else if (daysDiff <= 7)
score += dateWeight * 0.30;
// Beyond 7 days = 0 points
}
// Due date matching for bills (weight: 10% bonus)
if (receipt.DueDate.HasValue)
{
const double dueDateWeight = 0.10;
totalWeight += dueDateWeight;
var daysDiff = Math.Abs((transaction.Date - receipt.DueDate.Value).TotalDays);
if (daysDiff <= 1)
score += dueDateWeight * 1.0;
else if (daysDiff <= 3)
score += dueDateWeight * 0.70;
else if (daysDiff <= 5)
score += dueDateWeight * 0.40;
}
// Merchant/Name matching (weight: 35%)
if (!string.IsNullOrWhiteSpace(receipt.Merchant))
{
const double merchantWeight = 0.35;
totalWeight += merchantWeight;
var merchantScore = CalculateMerchantMatchScore(
receipt.Merchant,
transaction.Merchant?.Name,
transaction.Name);
score += merchantWeight * merchantScore;
}
// Normalize score if we didn't have all data points
if (totalWeight > 0 && totalWeight < 1.0)
{
score = score / totalWeight;
}
return Math.Min(score, 1.0);
}
private double CalculateMerchantMatchScore(string receiptMerchant, string? transactionMerchant, string? transactionName)
{
var receiptLower = receiptMerchant.ToLowerInvariant().Trim();
var merchantLower = transactionMerchant?.ToLowerInvariant().Trim() ?? "";
var nameLower = transactionName?.ToLowerInvariant().Trim() ?? "";
// Exact match
if (receiptLower == merchantLower || receiptLower == nameLower)
return 1.0;
// Contains match
if (merchantLower.Contains(receiptLower) || receiptLower.Contains(merchantLower))
return 0.90;
if (nameLower.Contains(receiptLower) || receiptLower.Contains(nameLower))
return 0.85;
// Word-based matching
var receiptWords = ExtractWords(receiptLower);
var merchantWords = ExtractWords(merchantLower);
var nameWords = ExtractWords(nameLower);
var merchantMatchRatio = CalculateWordMatchRatio(receiptWords, merchantWords);
var nameMatchRatio = CalculateWordMatchRatio(receiptWords, nameWords);
return Math.Max(merchantMatchRatio, nameMatchRatio);
}
private static HashSet<string> ExtractWords(string text)
{
return text
.Split(new[] { ' ', '-', '_', '.', ',', '#', '/', '\\', '*' }, StringSplitOptions.RemoveEmptyEntries)
.Where(w => w.Length > 1) // Skip single chars
.ToHashSet();
}
private static double CalculateWordMatchRatio(HashSet<string> words1, HashSet<string> words2)
{
if (words1.Count == 0 || words2.Count == 0)
return 0;
int matches = 0;
foreach (var w1 in words1)
{
foreach (var w2 in words2)
{
if (w1 == w2 || w1.Contains(w2) || w2.Contains(w1))
{
matches++;
break;
}
}
}
// Return ratio of matched words from the smaller set
var smallerCount = Math.Min(words1.Count, words2.Count);
return (double)matches / smallerCount;
}
private async Task<LlmMatchResult?> GetLlmMatchDecisionAsync(Receipt receipt, List<ScoredCandidate> candidates)
{
try
{
var prompt = BuildLlmPrompt(receipt, candidates);
_logger.LogInformation("Sending receipt matching prompt to LLM for receipt {ReceiptId}", receipt.Id);
var result = await _llmClient.SendTextPromptAsync(prompt);
if (!result.IsSuccess)
{
_logger.LogWarning("LLM matching failed: {Error}", result.ErrorMessage);
return null;
}
_logger.LogInformation("LLM response: {Content}", result.Content);
return ParseLlmResponse(result.Content, candidates);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error during LLM match decision");
return null;
}
}
private static string BuildLlmPrompt(Receipt receipt, List<ScoredCandidate> candidates)
{
var sb = new StringBuilder();
sb.AppendLine("You are matching a receipt to bank transactions. Analyze and pick the best match.");
sb.AppendLine();
sb.AppendLine("RECEIPT:");
sb.AppendLine($" Merchant: {receipt.Merchant ?? "Unknown"}");
sb.AppendLine($" Date: {receipt.ReceiptDate?.ToString("yyyy-MM-dd") ?? "Unknown"}");
if (receipt.DueDate.HasValue)
sb.AppendLine($" Due Date: {receipt.DueDate.Value:yyyy-MM-dd}");
sb.AppendLine($" Total: {receipt.Total?.ToString("C") ?? "Unknown"}");
sb.AppendLine();
sb.AppendLine("CANDIDATE TRANSACTIONS:");
for (int i = 0; i < candidates.Count; i++)
{
var t = candidates[i].Transaction;
sb.AppendLine($" [{i + 1}] ID={t.Id}");
sb.AppendLine($" Name: {t.Name}");
if (t.Merchant != null)
sb.AppendLine($" Merchant: {t.Merchant.Name}");
sb.AppendLine($" Date: {t.Date:yyyy-MM-dd}");
sb.AppendLine($" Amount: {t.Amount:C}");
sb.AppendLine($" Current Score: {candidates[i].Score:P0}");
sb.AppendLine();
}
sb.AppendLine("Respond with JSON only:");
sb.AppendLine("{");
sb.AppendLine(" \"match_index\": <1-based index of best match, or 0 if none match>,");
sb.AppendLine(" \"confidence\": <0.0 to 1.0>,");
sb.AppendLine(" \"reason\": \"<brief explanation>\"");
sb.AppendLine("}");
return sb.ToString();
}
private LlmMatchResult? ParseLlmResponse(string? content, List<ScoredCandidate> candidates)
{
if (string.IsNullOrWhiteSpace(content))
return null;
try
{
var json = JsonSerializer.Deserialize<JsonElement>(content);
var matchIndex = json.GetProperty("match_index").GetInt32();
var confidence = json.GetProperty("confidence").GetDouble();
if (matchIndex <= 0 || matchIndex > candidates.Count)
return null;
return new LlmMatchResult
{
TransactionId = candidates[matchIndex - 1].Transaction.Id,
Confidence = confidence
};
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse LLM response: {Content}", content);
return null;
}
}
}
public class ScoredCandidate
{
public required Transaction Transaction { get; set; }
public double Score { get; set; }
}
public class LlmMatchResult
{
public long TransactionId { get; set; }
public double Confidence { get; set; }
}
public class ReceiptAutoMapResult
{
public AutoMapStatus Status { get; init; }
public long? TransactionId { get; init; }
public List<Transaction> MultipleMatches { get; init; } = new();
public string? Message { get; init; }
public static ReceiptAutoMapResult Success(long transactionId) =>
new() { Status = AutoMapStatus.Success, TransactionId = transactionId };
public static ReceiptAutoMapResult AlreadyMapped(long transactionId) =>
new() { Status = AutoMapStatus.AlreadyMapped, TransactionId = transactionId };
public static ReceiptAutoMapResult NoMatch() =>
new() { Status = AutoMapStatus.NoMatch, Message = "No matching transaction found." };
public static ReceiptAutoMapResult WithMultipleMatches(List<Transaction> matches) =>
new() { Status = AutoMapStatus.MultipleMatches, MultipleMatches = matches, Message = $"Found {matches.Count} potential matches." };
public static ReceiptAutoMapResult NotParsed() =>
new() { Status = AutoMapStatus.NotParsed, Message = "Receipt has not been parsed yet." };
public static ReceiptAutoMapResult Failure(string message) =>
new() { Status = AutoMapStatus.Failed, Message = message };
}
public class BulkAutoMapResult
{
public int TotalProcessed { get; set; }
public int MappedCount { get; set; }
public int NoMatchCount { get; set; }
public int MultipleMatchesCount { get; set; }
}
public enum AutoMapStatus
{
Success,
AlreadyMapped,
NoMatch,
MultipleMatches,
NotParsed,
Failed
}
}