Feature: Enhanced receipt auto-mapping with weighted scoring

Replace simple matching with weighted scoring algorithm (40% amount,
25% date, 35% merchant, 10% due date). Uses confidence thresholds:
- 85%+ auto-maps immediately
- 50-85% uses LLM to decide
- Below 50% returns candidates for manual review

Transactions can no longer match before receipt date since receipts
are issued at time of purchase.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-11 16:54:03 -05:00
parent dc56021a77
commit f3b847cc68

View File

@@ -1,6 +1,8 @@
using Microsoft.EntityFrameworkCore;
using MoneyMap.Data;
using MoneyMap.Models;
using System.Text;
using System.Text.Json;
namespace MoneyMap.Services
{
@@ -8,17 +10,30 @@ namespace MoneyMap.Services
{
Task<ReceiptAutoMapResult> AutoMapReceiptAsync(long receiptId);
Task<BulkAutoMapResult> AutoMapUnmappedReceiptsAsync();
Task<List<ScoredCandidate>> GetScoredCandidatesAsync(long receiptId);
}
public class ReceiptAutoMapper : IReceiptAutoMapper
{
private readonly MoneyMapContext _db;
private readonly IReceiptManager _receiptManager;
private readonly LlamaCppVisionClient _llmClient;
private readonly ILogger<ReceiptAutoMapper> _logger;
public ReceiptAutoMapper(MoneyMapContext db, IReceiptManager receiptManager)
// Confidence thresholds
private const double AutoMapThreshold = 0.85; // Auto-map if score >= 85%
private const double LlmReviewThreshold = 0.50; // Use LLM if score between 50-85%
public ReceiptAutoMapper(
MoneyMapContext db,
IReceiptManager receiptManager,
LlamaCppVisionClient llmClient,
ILogger<ReceiptAutoMapper> logger)
{
_db = db;
_receiptManager = receiptManager;
_llmClient = llmClient;
_logger = logger;
}
public async Task<ReceiptAutoMapResult> AutoMapReceiptAsync(long receiptId)
@@ -30,38 +45,70 @@ namespace MoneyMap.Services
if (receipt == null)
return ReceiptAutoMapResult.Failure("Receipt not found.");
// If already mapped, skip
if (receipt.TransactionId.HasValue)
return ReceiptAutoMapResult.AlreadyMapped(receipt.TransactionId.Value);
// If receipt has not been parsed (no merchant, date, or total), skip
if (string.IsNullOrWhiteSpace(receipt.Merchant) && !receipt.ReceiptDate.HasValue && !receipt.Total.HasValue)
return ReceiptAutoMapResult.NotParsed();
// Find matching transactions based on parsed data
var candidateTransactions = await FindMatchingTransactionsAsync(receipt);
var scoredCandidates = await FindAndScoreCandidatesAsync(receipt);
if (candidateTransactions.Count == 0)
if (scoredCandidates.Count == 0)
return ReceiptAutoMapResult.NoMatch();
if (candidateTransactions.Count > 1)
return ReceiptAutoMapResult.WithMultipleMatches(candidateTransactions);
var bestMatch = scoredCandidates[0];
// Single match found - auto-map it
var transaction = candidateTransactions[0];
var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, transaction.Id);
// High confidence - auto-map directly
if (bestMatch.Score >= AutoMapThreshold)
{
_logger.LogInformation(
"Auto-mapping receipt {ReceiptId} to transaction {TransactionId} with score {Score:P0}",
receiptId, bestMatch.Transaction.Id, bestMatch.Score);
if (success)
return ReceiptAutoMapResult.Success(transaction.Id);
else
return ReceiptAutoMapResult.Failure("Failed to map receipt to transaction.");
var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, bestMatch.Transaction.Id);
return success
? ReceiptAutoMapResult.Success(bestMatch.Transaction.Id)
: ReceiptAutoMapResult.Failure("Failed to map receipt to transaction.");
}
// Medium confidence - use LLM to decide
if (bestMatch.Score >= LlmReviewThreshold)
{
var topCandidates = scoredCandidates.Take(5).ToList();
var llmResult = await GetLlmMatchDecisionAsync(receipt, topCandidates);
if (llmResult != null && llmResult.Confidence >= 0.7)
{
_logger.LogInformation(
"LLM matched receipt {ReceiptId} to transaction {TransactionId} with confidence {Confidence:P0}",
receiptId, llmResult.TransactionId, llmResult.Confidence);
var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, llmResult.TransactionId);
return success
? ReceiptAutoMapResult.Success(llmResult.TransactionId)
: ReceiptAutoMapResult.Failure("Failed to map receipt to transaction.");
}
// LLM uncertain - return multiple matches for manual review
return ReceiptAutoMapResult.WithMultipleMatches(
topCandidates.Select(c => c.Transaction).ToList());
}
// Low confidence - no good matches
if (scoredCandidates.Count > 1)
{
return ReceiptAutoMapResult.WithMultipleMatches(
scoredCandidates.Take(5).Select(c => c.Transaction).ToList());
}
return ReceiptAutoMapResult.NoMatch();
}
public async Task<BulkAutoMapResult> AutoMapUnmappedReceiptsAsync()
{
var unmappedReceipts = await _db.Receipts
.Where(r => r.TransactionId == null)
.Where(r => r.Merchant != null || r.ReceiptDate != null || r.Total != null) // Only parsed receipts
.Where(r => r.Merchant != null || r.ReceiptDate != null || r.Total != null)
.ToListAsync();
var result = new BulkAutoMapResult();
@@ -71,105 +118,60 @@ namespace MoneyMap.Services
var mapResult = await AutoMapReceiptAsync(receipt.Id);
if (mapResult.Status == AutoMapStatus.Success)
{
result.MappedCount++;
}
else if (mapResult.Status == AutoMapStatus.MultipleMatches)
{
result.MultipleMatchesCount++;
}
else if (mapResult.Status == AutoMapStatus.NoMatch)
{
result.NoMatchCount++;
}
}
result.TotalProcessed = unmappedReceipts.Count;
return result;
}
private async Task<List<Transaction>> FindMatchingTransactionsAsync(Receipt receipt)
public async Task<List<ScoredCandidate>> GetScoredCandidatesAsync(long receiptId)
{
var receipt = await _db.Receipts
.FirstOrDefaultAsync(r => r.Id == receiptId);
if (receipt == null)
return new List<ScoredCandidate>();
return await FindAndScoreCandidatesAsync(receipt);
}
private async Task<List<ScoredCandidate>> FindAndScoreCandidatesAsync(Receipt receipt)
{
// Get transactions in a reasonable date range
var query = _db.Transactions
.Include(t => t.Card)
.Include(t => t.Account)
.Include(t => t.Merchant)
.AsQueryable();
// Start with date range filter
if (receipt.ReceiptDate.HasValue && receipt.DueDate.HasValue)
// Date range: use receipt date or due date
// Transactions can't occur before the receipt date (you get a receipt when you buy something)
DateTime? targetDate = receipt.ReceiptDate;
DateTime? dueDate = receipt.DueDate;
if (targetDate.HasValue || dueDate.HasValue)
{
// For bills with due dates: use range from bill date to due date + 5 days
// (to account for auto-pay processing delays, weekends, etc.)
var minDate = receipt.ReceiptDate.Value;
var maxDate = receipt.DueDate.Value.AddDays(5);
query = query.Where(t => t.Date >= minDate && t.Date <= maxDate);
}
else if (receipt.ReceiptDate.HasValue)
{
// For regular receipts: allow +/- 3 days for transaction date to account for processing delays
var minDate = receipt.ReceiptDate.Value.AddDays(-3);
var maxDate = receipt.ReceiptDate.Value.AddDays(3);
// Min date is the receipt date - transactions can't precede the receipt
var minDate = targetDate ?? dueDate!.Value;
var maxDate = (dueDate ?? targetDate!.Value).AddDays(7);
query = query.Where(t => t.Date >= minDate && t.Date <= maxDate);
}
else
{
// If no receipt date, can't narrow down effectively
return new List<Transaction>();
// No date info - can't match reliably
return new List<ScoredCandidate>();
}
// Get candidates
var candidates = await query.ToListAsync();
// Sort by merchant/name relevance using word matching if merchant available
if (!string.IsNullOrWhiteSpace(receipt.Merchant))
{
var receiptWords = receipt.Merchant.ToLower().Split(new[] { ' ', '-', '_', '.' }, StringSplitOptions.RemoveEmptyEntries);
candidates = candidates
.OrderByDescending(t =>
{
var merchantName = t.Merchant?.Name?.ToLower() ?? "";
var transactionName = t.Name?.ToLower() ?? "";
// Exact match
if (merchantName == receipt.Merchant.ToLower() || transactionName == receipt.Merchant.ToLower())
return 1000;
// Count matching words
var merchantWords = merchantName.Split(new[] { ' ', '-', '_', '.' }, StringSplitOptions.RemoveEmptyEntries);
var transactionWords = transactionName.Split(new[] { ' ', '-', '_', '.' }, StringSplitOptions.RemoveEmptyEntries);
var merchantMatches = receiptWords.Count(rw => merchantWords.Any(mw => mw.Contains(rw) || rw.Contains(mw)));
var transactionMatches = receiptWords.Count(rw => transactionWords.Any(tw => tw.Contains(rw) || rw.Contains(tw)));
// Return the higher match count
return Math.Max(merchantMatches * 10, transactionMatches * 10);
})
.ThenByDescending(t => t.Date)
.ToList();
}
// If we have a total amount, filter by amount match (±10% tolerance)
if (receipt.Total.HasValue)
{
var receiptTotal = Math.Abs(receipt.Total.Value);
var tolerance = receiptTotal * 0.10m; // 10% tolerance
var minAmount = receiptTotal - tolerance;
var maxAmount = receiptTotal + tolerance;
candidates = candidates
.Where(t =>
{
var transactionAmount = Math.Abs(t.Amount);
return transactionAmount >= minAmount && transactionAmount <= maxAmount;
})
.ToList();
}
// Exclude transactions that already have receipts
var transactionsWithReceipts = await _db.Receipts
.Where(r => r.TransactionId != null)
.Where(r => r.TransactionId != null && r.Id != receipt.Id)
.Select(r => r.TransactionId!.Value)
.Distinct()
.ToListAsync();
@@ -178,8 +180,273 @@ namespace MoneyMap.Services
.Where(t => !transactionsWithReceipts.Contains(t.Id))
.ToList();
return candidates;
// Score each candidate
var scored = candidates
.Select(t => new ScoredCandidate
{
Transaction = t,
Score = CalculateMatchScore(receipt, t)
})
.Where(s => s.Score > 0.1) // Filter out very poor matches
.OrderByDescending(s => s.Score)
.ToList();
return scored;
}
private double CalculateMatchScore(Receipt receipt, Transaction transaction)
{
double score = 0;
double totalWeight = 0;
// Amount matching (weight: 40%)
if (receipt.Total.HasValue)
{
const double amountWeight = 0.40;
totalWeight += amountWeight;
var receiptAmount = Math.Abs(receipt.Total.Value);
var transactionAmount = Math.Abs(transaction.Amount);
if (receiptAmount > 0)
{
var difference = (double)(Math.Abs(receiptAmount - transactionAmount) / receiptAmount);
if (difference == 0)
score += amountWeight * 1.0;
else if (difference <= 0.01) // Within 1%
score += amountWeight * 0.95;
else if (difference <= 0.05) // Within 5%
score += amountWeight * 0.80;
else if (difference <= 0.10) // Within 10%
score += amountWeight * 0.60;
else if (difference <= 0.20) // Within 20%
score += amountWeight * 0.30;
// Beyond 20% = 0 points
}
}
// Date matching (weight: 25%)
if (receipt.ReceiptDate.HasValue)
{
const double dateWeight = 0.25;
totalWeight += dateWeight;
var daysDiff = Math.Abs((transaction.Date - receipt.ReceiptDate.Value).TotalDays);
if (daysDiff == 0)
score += dateWeight * 1.0;
else if (daysDiff <= 1)
score += dateWeight * 0.90;
else if (daysDiff <= 3)
score += dateWeight * 0.70;
else if (daysDiff <= 5)
score += dateWeight * 0.50;
else if (daysDiff <= 7)
score += dateWeight * 0.30;
// Beyond 7 days = 0 points
}
// Due date matching for bills (weight: 10% bonus)
if (receipt.DueDate.HasValue)
{
const double dueDateWeight = 0.10;
totalWeight += dueDateWeight;
var daysDiff = Math.Abs((transaction.Date - receipt.DueDate.Value).TotalDays);
if (daysDiff <= 1)
score += dueDateWeight * 1.0;
else if (daysDiff <= 3)
score += dueDateWeight * 0.70;
else if (daysDiff <= 5)
score += dueDateWeight * 0.40;
}
// Merchant/Name matching (weight: 35%)
if (!string.IsNullOrWhiteSpace(receipt.Merchant))
{
const double merchantWeight = 0.35;
totalWeight += merchantWeight;
var merchantScore = CalculateMerchantMatchScore(
receipt.Merchant,
transaction.Merchant?.Name,
transaction.Name);
score += merchantWeight * merchantScore;
}
// Normalize score if we didn't have all data points
if (totalWeight > 0 && totalWeight < 1.0)
{
score = score / totalWeight;
}
return Math.Min(score, 1.0);
}
private double CalculateMerchantMatchScore(string receiptMerchant, string? transactionMerchant, string? transactionName)
{
var receiptLower = receiptMerchant.ToLowerInvariant().Trim();
var merchantLower = transactionMerchant?.ToLowerInvariant().Trim() ?? "";
var nameLower = transactionName?.ToLowerInvariant().Trim() ?? "";
// Exact match
if (receiptLower == merchantLower || receiptLower == nameLower)
return 1.0;
// Contains match
if (merchantLower.Contains(receiptLower) || receiptLower.Contains(merchantLower))
return 0.90;
if (nameLower.Contains(receiptLower) || receiptLower.Contains(nameLower))
return 0.85;
// Word-based matching
var receiptWords = ExtractWords(receiptLower);
var merchantWords = ExtractWords(merchantLower);
var nameWords = ExtractWords(nameLower);
var merchantMatchRatio = CalculateWordMatchRatio(receiptWords, merchantWords);
var nameMatchRatio = CalculateWordMatchRatio(receiptWords, nameWords);
return Math.Max(merchantMatchRatio, nameMatchRatio);
}
private static HashSet<string> ExtractWords(string text)
{
return text
.Split(new[] { ' ', '-', '_', '.', ',', '#', '/', '\\', '*' }, StringSplitOptions.RemoveEmptyEntries)
.Where(w => w.Length > 1) // Skip single chars
.ToHashSet();
}
private static double CalculateWordMatchRatio(HashSet<string> words1, HashSet<string> words2)
{
if (words1.Count == 0 || words2.Count == 0)
return 0;
int matches = 0;
foreach (var w1 in words1)
{
foreach (var w2 in words2)
{
if (w1 == w2 || w1.Contains(w2) || w2.Contains(w1))
{
matches++;
break;
}
}
}
// Return ratio of matched words from the smaller set
var smallerCount = Math.Min(words1.Count, words2.Count);
return (double)matches / smallerCount;
}
private async Task<LlmMatchResult?> GetLlmMatchDecisionAsync(Receipt receipt, List<ScoredCandidate> candidates)
{
try
{
var prompt = BuildLlmPrompt(receipt, candidates);
_logger.LogInformation("Sending receipt matching prompt to LLM for receipt {ReceiptId}", receipt.Id);
var result = await _llmClient.SendTextPromptAsync(prompt);
if (!result.IsSuccess)
{
_logger.LogWarning("LLM matching failed: {Error}", result.ErrorMessage);
return null;
}
_logger.LogInformation("LLM response: {Content}", result.Content);
return ParseLlmResponse(result.Content, candidates);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error during LLM match decision");
return null;
}
}
private static string BuildLlmPrompt(Receipt receipt, List<ScoredCandidate> candidates)
{
var sb = new StringBuilder();
sb.AppendLine("You are matching a receipt to bank transactions. Analyze and pick the best match.");
sb.AppendLine();
sb.AppendLine("RECEIPT:");
sb.AppendLine($" Merchant: {receipt.Merchant ?? "Unknown"}");
sb.AppendLine($" Date: {receipt.ReceiptDate?.ToString("yyyy-MM-dd") ?? "Unknown"}");
if (receipt.DueDate.HasValue)
sb.AppendLine($" Due Date: {receipt.DueDate.Value:yyyy-MM-dd}");
sb.AppendLine($" Total: {receipt.Total?.ToString("C") ?? "Unknown"}");
sb.AppendLine();
sb.AppendLine("CANDIDATE TRANSACTIONS:");
for (int i = 0; i < candidates.Count; i++)
{
var t = candidates[i].Transaction;
sb.AppendLine($" [{i + 1}] ID={t.Id}");
sb.AppendLine($" Name: {t.Name}");
if (t.Merchant != null)
sb.AppendLine($" Merchant: {t.Merchant.Name}");
sb.AppendLine($" Date: {t.Date:yyyy-MM-dd}");
sb.AppendLine($" Amount: {t.Amount:C}");
sb.AppendLine($" Current Score: {candidates[i].Score:P0}");
sb.AppendLine();
}
sb.AppendLine("Respond with JSON only:");
sb.AppendLine("{");
sb.AppendLine(" \"match_index\": <1-based index of best match, or 0 if none match>,");
sb.AppendLine(" \"confidence\": <0.0 to 1.0>,");
sb.AppendLine(" \"reason\": \"<brief explanation>\"");
sb.AppendLine("}");
return sb.ToString();
}
private LlmMatchResult? ParseLlmResponse(string? content, List<ScoredCandidate> candidates)
{
if (string.IsNullOrWhiteSpace(content))
return null;
try
{
var json = JsonSerializer.Deserialize<JsonElement>(content);
var matchIndex = json.GetProperty("match_index").GetInt32();
var confidence = json.GetProperty("confidence").GetDouble();
if (matchIndex <= 0 || matchIndex > candidates.Count)
return null;
return new LlmMatchResult
{
TransactionId = candidates[matchIndex - 1].Transaction.Id,
Confidence = confidence
};
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse LLM response: {Content}", content);
return null;
}
}
}
public class ScoredCandidate
{
public required Transaction Transaction { get; set; }
public double Score { get; set; }
}
public class LlmMatchResult
{
public long TransactionId { get; set; }
public double Confidence { get; set; }
}
public class ReceiptAutoMapResult