diff --git a/MoneyMap/Services/ReceiptAutoMapper.cs b/MoneyMap/Services/ReceiptAutoMapper.cs index 1fbbb82..fb628c2 100644 --- a/MoneyMap/Services/ReceiptAutoMapper.cs +++ b/MoneyMap/Services/ReceiptAutoMapper.cs @@ -1,6 +1,8 @@ using Microsoft.EntityFrameworkCore; using MoneyMap.Data; using MoneyMap.Models; +using System.Text; +using System.Text.Json; namespace MoneyMap.Services { @@ -8,17 +10,30 @@ namespace MoneyMap.Services { Task AutoMapReceiptAsync(long receiptId); Task AutoMapUnmappedReceiptsAsync(); + Task> GetScoredCandidatesAsync(long receiptId); } public class ReceiptAutoMapper : IReceiptAutoMapper { private readonly MoneyMapContext _db; private readonly IReceiptManager _receiptManager; + private readonly LlamaCppVisionClient _llmClient; + private readonly ILogger _logger; - public ReceiptAutoMapper(MoneyMapContext db, IReceiptManager receiptManager) + // Confidence thresholds + private const double AutoMapThreshold = 0.85; // Auto-map if score >= 85% + private const double LlmReviewThreshold = 0.50; // Use LLM if score between 50-85% + + public ReceiptAutoMapper( + MoneyMapContext db, + IReceiptManager receiptManager, + LlamaCppVisionClient llmClient, + ILogger logger) { _db = db; _receiptManager = receiptManager; + _llmClient = llmClient; + _logger = logger; } public async Task AutoMapReceiptAsync(long receiptId) @@ -30,38 +45,70 @@ namespace MoneyMap.Services if (receipt == null) return ReceiptAutoMapResult.Failure("Receipt not found."); - // If already mapped, skip if (receipt.TransactionId.HasValue) return ReceiptAutoMapResult.AlreadyMapped(receipt.TransactionId.Value); - // If receipt has not been parsed (no merchant, date, or total), skip if (string.IsNullOrWhiteSpace(receipt.Merchant) && !receipt.ReceiptDate.HasValue && !receipt.Total.HasValue) return ReceiptAutoMapResult.NotParsed(); - // Find matching transactions based on parsed data - var candidateTransactions = await FindMatchingTransactionsAsync(receipt); + var scoredCandidates = await FindAndScoreCandidatesAsync(receipt); - if (candidateTransactions.Count == 0) + if (scoredCandidates.Count == 0) return ReceiptAutoMapResult.NoMatch(); - if (candidateTransactions.Count > 1) - return ReceiptAutoMapResult.WithMultipleMatches(candidateTransactions); + var bestMatch = scoredCandidates[0]; - // Single match found - auto-map it - var transaction = candidateTransactions[0]; - var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, transaction.Id); + // High confidence - auto-map directly + if (bestMatch.Score >= AutoMapThreshold) + { + _logger.LogInformation( + "Auto-mapping receipt {ReceiptId} to transaction {TransactionId} with score {Score:P0}", + receiptId, bestMatch.Transaction.Id, bestMatch.Score); - if (success) - return ReceiptAutoMapResult.Success(transaction.Id); - else - return ReceiptAutoMapResult.Failure("Failed to map receipt to transaction."); + var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, bestMatch.Transaction.Id); + return success + ? ReceiptAutoMapResult.Success(bestMatch.Transaction.Id) + : ReceiptAutoMapResult.Failure("Failed to map receipt to transaction."); + } + + // Medium confidence - use LLM to decide + if (bestMatch.Score >= LlmReviewThreshold) + { + var topCandidates = scoredCandidates.Take(5).ToList(); + var llmResult = await GetLlmMatchDecisionAsync(receipt, topCandidates); + + if (llmResult != null && llmResult.Confidence >= 0.7) + { + _logger.LogInformation( + "LLM matched receipt {ReceiptId} to transaction {TransactionId} with confidence {Confidence:P0}", + receiptId, llmResult.TransactionId, llmResult.Confidence); + + var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, llmResult.TransactionId); + return success + ? ReceiptAutoMapResult.Success(llmResult.TransactionId) + : ReceiptAutoMapResult.Failure("Failed to map receipt to transaction."); + } + + // LLM uncertain - return multiple matches for manual review + return ReceiptAutoMapResult.WithMultipleMatches( + topCandidates.Select(c => c.Transaction).ToList()); + } + + // Low confidence - no good matches + if (scoredCandidates.Count > 1) + { + return ReceiptAutoMapResult.WithMultipleMatches( + scoredCandidates.Take(5).Select(c => c.Transaction).ToList()); + } + + return ReceiptAutoMapResult.NoMatch(); } public async Task AutoMapUnmappedReceiptsAsync() { var unmappedReceipts = await _db.Receipts .Where(r => r.TransactionId == null) - .Where(r => r.Merchant != null || r.ReceiptDate != null || r.Total != null) // Only parsed receipts + .Where(r => r.Merchant != null || r.ReceiptDate != null || r.Total != null) .ToListAsync(); var result = new BulkAutoMapResult(); @@ -71,105 +118,60 @@ namespace MoneyMap.Services var mapResult = await AutoMapReceiptAsync(receipt.Id); if (mapResult.Status == AutoMapStatus.Success) - { result.MappedCount++; - } else if (mapResult.Status == AutoMapStatus.MultipleMatches) - { result.MultipleMatchesCount++; - } else if (mapResult.Status == AutoMapStatus.NoMatch) - { result.NoMatchCount++; - } } result.TotalProcessed = unmappedReceipts.Count; return result; } - private async Task> FindMatchingTransactionsAsync(Receipt receipt) + public async Task> GetScoredCandidatesAsync(long receiptId) { + var receipt = await _db.Receipts + .FirstOrDefaultAsync(r => r.Id == receiptId); + + if (receipt == null) + return new List(); + + return await FindAndScoreCandidatesAsync(receipt); + } + + private async Task> FindAndScoreCandidatesAsync(Receipt receipt) + { + // Get transactions in a reasonable date range var query = _db.Transactions .Include(t => t.Card) .Include(t => t.Account) .Include(t => t.Merchant) .AsQueryable(); - // Start with date range filter - if (receipt.ReceiptDate.HasValue && receipt.DueDate.HasValue) + // Date range: use receipt date or due date + // Transactions can't occur before the receipt date (you get a receipt when you buy something) + DateTime? targetDate = receipt.ReceiptDate; + DateTime? dueDate = receipt.DueDate; + + if (targetDate.HasValue || dueDate.HasValue) { - // For bills with due dates: use range from bill date to due date + 5 days - // (to account for auto-pay processing delays, weekends, etc.) - var minDate = receipt.ReceiptDate.Value; - var maxDate = receipt.DueDate.Value.AddDays(5); - query = query.Where(t => t.Date >= minDate && t.Date <= maxDate); - } - else if (receipt.ReceiptDate.HasValue) - { - // For regular receipts: allow +/- 3 days for transaction date to account for processing delays - var minDate = receipt.ReceiptDate.Value.AddDays(-3); - var maxDate = receipt.ReceiptDate.Value.AddDays(3); + // Min date is the receipt date - transactions can't precede the receipt + var minDate = targetDate ?? dueDate!.Value; + var maxDate = (dueDate ?? targetDate!.Value).AddDays(7); query = query.Where(t => t.Date >= minDate && t.Date <= maxDate); } else { - // If no receipt date, can't narrow down effectively - return new List(); + // No date info - can't match reliably + return new List(); } - // Get candidates var candidates = await query.ToListAsync(); - // Sort by merchant/name relevance using word matching if merchant available - if (!string.IsNullOrWhiteSpace(receipt.Merchant)) - { - var receiptWords = receipt.Merchant.ToLower().Split(new[] { ' ', '-', '_', '.' }, StringSplitOptions.RemoveEmptyEntries); - - candidates = candidates - .OrderByDescending(t => - { - var merchantName = t.Merchant?.Name?.ToLower() ?? ""; - var transactionName = t.Name?.ToLower() ?? ""; - - // Exact match - if (merchantName == receipt.Merchant.ToLower() || transactionName == receipt.Merchant.ToLower()) - return 1000; - - // Count matching words - var merchantWords = merchantName.Split(new[] { ' ', '-', '_', '.' }, StringSplitOptions.RemoveEmptyEntries); - var transactionWords = transactionName.Split(new[] { ' ', '-', '_', '.' }, StringSplitOptions.RemoveEmptyEntries); - - var merchantMatches = receiptWords.Count(rw => merchantWords.Any(mw => mw.Contains(rw) || rw.Contains(mw))); - var transactionMatches = receiptWords.Count(rw => transactionWords.Any(tw => tw.Contains(rw) || rw.Contains(tw))); - - // Return the higher match count - return Math.Max(merchantMatches * 10, transactionMatches * 10); - }) - .ThenByDescending(t => t.Date) - .ToList(); - } - - // If we have a total amount, filter by amount match (±10% tolerance) - if (receipt.Total.HasValue) - { - var receiptTotal = Math.Abs(receipt.Total.Value); - var tolerance = receiptTotal * 0.10m; // 10% tolerance - var minAmount = receiptTotal - tolerance; - var maxAmount = receiptTotal + tolerance; - - candidates = candidates - .Where(t => - { - var transactionAmount = Math.Abs(t.Amount); - return transactionAmount >= minAmount && transactionAmount <= maxAmount; - }) - .ToList(); - } - // Exclude transactions that already have receipts var transactionsWithReceipts = await _db.Receipts - .Where(r => r.TransactionId != null) + .Where(r => r.TransactionId != null && r.Id != receipt.Id) .Select(r => r.TransactionId!.Value) .Distinct() .ToListAsync(); @@ -178,8 +180,273 @@ namespace MoneyMap.Services .Where(t => !transactionsWithReceipts.Contains(t.Id)) .ToList(); - return candidates; + // Score each candidate + var scored = candidates + .Select(t => new ScoredCandidate + { + Transaction = t, + Score = CalculateMatchScore(receipt, t) + }) + .Where(s => s.Score > 0.1) // Filter out very poor matches + .OrderByDescending(s => s.Score) + .ToList(); + + return scored; } + + private double CalculateMatchScore(Receipt receipt, Transaction transaction) + { + double score = 0; + double totalWeight = 0; + + // Amount matching (weight: 40%) + if (receipt.Total.HasValue) + { + const double amountWeight = 0.40; + totalWeight += amountWeight; + + var receiptAmount = Math.Abs(receipt.Total.Value); + var transactionAmount = Math.Abs(transaction.Amount); + + if (receiptAmount > 0) + { + var difference = (double)(Math.Abs(receiptAmount - transactionAmount) / receiptAmount); + + if (difference == 0) + score += amountWeight * 1.0; + else if (difference <= 0.01) // Within 1% + score += amountWeight * 0.95; + else if (difference <= 0.05) // Within 5% + score += amountWeight * 0.80; + else if (difference <= 0.10) // Within 10% + score += amountWeight * 0.60; + else if (difference <= 0.20) // Within 20% + score += amountWeight * 0.30; + // Beyond 20% = 0 points + } + } + + // Date matching (weight: 25%) + if (receipt.ReceiptDate.HasValue) + { + const double dateWeight = 0.25; + totalWeight += dateWeight; + + var daysDiff = Math.Abs((transaction.Date - receipt.ReceiptDate.Value).TotalDays); + + if (daysDiff == 0) + score += dateWeight * 1.0; + else if (daysDiff <= 1) + score += dateWeight * 0.90; + else if (daysDiff <= 3) + score += dateWeight * 0.70; + else if (daysDiff <= 5) + score += dateWeight * 0.50; + else if (daysDiff <= 7) + score += dateWeight * 0.30; + // Beyond 7 days = 0 points + } + + // Due date matching for bills (weight: 10% bonus) + if (receipt.DueDate.HasValue) + { + const double dueDateWeight = 0.10; + totalWeight += dueDateWeight; + + var daysDiff = Math.Abs((transaction.Date - receipt.DueDate.Value).TotalDays); + + if (daysDiff <= 1) + score += dueDateWeight * 1.0; + else if (daysDiff <= 3) + score += dueDateWeight * 0.70; + else if (daysDiff <= 5) + score += dueDateWeight * 0.40; + } + + // Merchant/Name matching (weight: 35%) + if (!string.IsNullOrWhiteSpace(receipt.Merchant)) + { + const double merchantWeight = 0.35; + totalWeight += merchantWeight; + + var merchantScore = CalculateMerchantMatchScore( + receipt.Merchant, + transaction.Merchant?.Name, + transaction.Name); + + score += merchantWeight * merchantScore; + } + + // Normalize score if we didn't have all data points + if (totalWeight > 0 && totalWeight < 1.0) + { + score = score / totalWeight; + } + + return Math.Min(score, 1.0); + } + + private double CalculateMerchantMatchScore(string receiptMerchant, string? transactionMerchant, string? transactionName) + { + var receiptLower = receiptMerchant.ToLowerInvariant().Trim(); + var merchantLower = transactionMerchant?.ToLowerInvariant().Trim() ?? ""; + var nameLower = transactionName?.ToLowerInvariant().Trim() ?? ""; + + // Exact match + if (receiptLower == merchantLower || receiptLower == nameLower) + return 1.0; + + // Contains match + if (merchantLower.Contains(receiptLower) || receiptLower.Contains(merchantLower)) + return 0.90; + if (nameLower.Contains(receiptLower) || receiptLower.Contains(nameLower)) + return 0.85; + + // Word-based matching + var receiptWords = ExtractWords(receiptLower); + var merchantWords = ExtractWords(merchantLower); + var nameWords = ExtractWords(nameLower); + + var merchantMatchRatio = CalculateWordMatchRatio(receiptWords, merchantWords); + var nameMatchRatio = CalculateWordMatchRatio(receiptWords, nameWords); + + return Math.Max(merchantMatchRatio, nameMatchRatio); + } + + private static HashSet ExtractWords(string text) + { + return text + .Split(new[] { ' ', '-', '_', '.', ',', '#', '/', '\\', '*' }, StringSplitOptions.RemoveEmptyEntries) + .Where(w => w.Length > 1) // Skip single chars + .ToHashSet(); + } + + private static double CalculateWordMatchRatio(HashSet words1, HashSet words2) + { + if (words1.Count == 0 || words2.Count == 0) + return 0; + + int matches = 0; + foreach (var w1 in words1) + { + foreach (var w2 in words2) + { + if (w1 == w2 || w1.Contains(w2) || w2.Contains(w1)) + { + matches++; + break; + } + } + } + + // Return ratio of matched words from the smaller set + var smallerCount = Math.Min(words1.Count, words2.Count); + return (double)matches / smallerCount; + } + + private async Task GetLlmMatchDecisionAsync(Receipt receipt, List candidates) + { + try + { + var prompt = BuildLlmPrompt(receipt, candidates); + + _logger.LogInformation("Sending receipt matching prompt to LLM for receipt {ReceiptId}", receipt.Id); + + var result = await _llmClient.SendTextPromptAsync(prompt); + + if (!result.IsSuccess) + { + _logger.LogWarning("LLM matching failed: {Error}", result.ErrorMessage); + return null; + } + + _logger.LogInformation("LLM response: {Content}", result.Content); + + return ParseLlmResponse(result.Content, candidates); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error during LLM match decision"); + return null; + } + } + + private static string BuildLlmPrompt(Receipt receipt, List candidates) + { + var sb = new StringBuilder(); + sb.AppendLine("You are matching a receipt to bank transactions. Analyze and pick the best match."); + sb.AppendLine(); + sb.AppendLine("RECEIPT:"); + sb.AppendLine($" Merchant: {receipt.Merchant ?? "Unknown"}"); + sb.AppendLine($" Date: {receipt.ReceiptDate?.ToString("yyyy-MM-dd") ?? "Unknown"}"); + if (receipt.DueDate.HasValue) + sb.AppendLine($" Due Date: {receipt.DueDate.Value:yyyy-MM-dd}"); + sb.AppendLine($" Total: {receipt.Total?.ToString("C") ?? "Unknown"}"); + sb.AppendLine(); + sb.AppendLine("CANDIDATE TRANSACTIONS:"); + + for (int i = 0; i < candidates.Count; i++) + { + var t = candidates[i].Transaction; + sb.AppendLine($" [{i + 1}] ID={t.Id}"); + sb.AppendLine($" Name: {t.Name}"); + if (t.Merchant != null) + sb.AppendLine($" Merchant: {t.Merchant.Name}"); + sb.AppendLine($" Date: {t.Date:yyyy-MM-dd}"); + sb.AppendLine($" Amount: {t.Amount:C}"); + sb.AppendLine($" Current Score: {candidates[i].Score:P0}"); + sb.AppendLine(); + } + + sb.AppendLine("Respond with JSON only:"); + sb.AppendLine("{"); + sb.AppendLine(" \"match_index\": <1-based index of best match, or 0 if none match>,"); + sb.AppendLine(" \"confidence\": <0.0 to 1.0>,"); + sb.AppendLine(" \"reason\": \"\""); + sb.AppendLine("}"); + + return sb.ToString(); + } + + private LlmMatchResult? ParseLlmResponse(string? content, List candidates) + { + if (string.IsNullOrWhiteSpace(content)) + return null; + + try + { + var json = JsonSerializer.Deserialize(content); + + var matchIndex = json.GetProperty("match_index").GetInt32(); + var confidence = json.GetProperty("confidence").GetDouble(); + + if (matchIndex <= 0 || matchIndex > candidates.Count) + return null; + + return new LlmMatchResult + { + TransactionId = candidates[matchIndex - 1].Transaction.Id, + Confidence = confidence + }; + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Failed to parse LLM response: {Content}", content); + return null; + } + } + } + + public class ScoredCandidate + { + public required Transaction Transaction { get; set; } + public double Score { get; set; } + } + + public class LlmMatchResult + { + public long TransactionId { get; set; } + public double Confidence { get; set; } } public class ReceiptAutoMapResult