using Microsoft.EntityFrameworkCore; using MoneyMap.Data; using MoneyMap.Models; using System.Text; using System.Text.Json; namespace MoneyMap.Services { public interface IReceiptAutoMapper { Task AutoMapReceiptAsync(long receiptId); Task AutoMapUnmappedReceiptsAsync(); Task> GetScoredCandidatesAsync(long receiptId); } public class ReceiptAutoMapper : IReceiptAutoMapper { private readonly MoneyMapContext _db; private readonly IReceiptManager _receiptManager; private readonly LlamaCppVisionClient _llmClient; private readonly ILogger _logger; // Confidence thresholds private const double AutoMapThreshold = 0.85; // Auto-map if score >= 85% private const double LlmReviewThreshold = 0.50; // Use LLM if score between 50-85% public ReceiptAutoMapper( MoneyMapContext db, IReceiptManager receiptManager, LlamaCppVisionClient llmClient, ILogger logger) { _db = db; _receiptManager = receiptManager; _llmClient = llmClient; _logger = logger; } public async Task AutoMapReceiptAsync(long receiptId) { var receipt = await _db.Receipts .Include(r => r.Transaction) .FirstOrDefaultAsync(r => r.Id == receiptId); if (receipt == null) return ReceiptAutoMapResult.Failure("Receipt not found."); if (receipt.TransactionId.HasValue) return ReceiptAutoMapResult.AlreadyMapped(receipt.TransactionId.Value); if (string.IsNullOrWhiteSpace(receipt.Merchant) && !receipt.ReceiptDate.HasValue && !receipt.Total.HasValue) return ReceiptAutoMapResult.NotParsed(); var scoredCandidates = await FindAndScoreCandidatesAsync(receipt); if (scoredCandidates.Count == 0) return ReceiptAutoMapResult.NoMatch(); var bestMatch = scoredCandidates[0]; // High confidence - auto-map directly if (bestMatch.Score >= AutoMapThreshold) { _logger.LogInformation( "Auto-mapping receipt {ReceiptId} to transaction {TransactionId} with score {Score:P0}", receiptId, bestMatch.Transaction.Id, bestMatch.Score); var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, bestMatch.Transaction.Id); return success ? ReceiptAutoMapResult.Success(bestMatch.Transaction.Id) : ReceiptAutoMapResult.Failure("Failed to map receipt to transaction."); } // Medium confidence - use LLM to decide if (bestMatch.Score >= LlmReviewThreshold) { var topCandidates = scoredCandidates.Take(5).ToList(); var llmResult = await GetLlmMatchDecisionAsync(receipt, topCandidates); if (llmResult != null && llmResult.Confidence >= 0.7) { _logger.LogInformation( "LLM matched receipt {ReceiptId} to transaction {TransactionId} with confidence {Confidence:P0}", receiptId, llmResult.TransactionId, llmResult.Confidence); var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, llmResult.TransactionId); return success ? ReceiptAutoMapResult.Success(llmResult.TransactionId) : ReceiptAutoMapResult.Failure("Failed to map receipt to transaction."); } // LLM uncertain - return multiple matches for manual review return ReceiptAutoMapResult.WithMultipleMatches( topCandidates.Select(c => c.Transaction).ToList()); } // Low confidence - no good matches if (scoredCandidates.Count > 1) { return ReceiptAutoMapResult.WithMultipleMatches( scoredCandidates.Take(5).Select(c => c.Transaction).ToList()); } return ReceiptAutoMapResult.NoMatch(); } public async Task AutoMapUnmappedReceiptsAsync() { var unmappedReceipts = await _db.Receipts .Where(r => r.TransactionId == null) .Where(r => r.Merchant != null || r.ReceiptDate != null || r.Total != null) .ToListAsync(); var result = new BulkAutoMapResult(); foreach (var receipt in unmappedReceipts) { var mapResult = await AutoMapReceiptAsync(receipt.Id); if (mapResult.Status == AutoMapStatus.Success) result.MappedCount++; else if (mapResult.Status == AutoMapStatus.MultipleMatches) result.MultipleMatchesCount++; else if (mapResult.Status == AutoMapStatus.NoMatch) result.NoMatchCount++; } result.TotalProcessed = unmappedReceipts.Count; return result; } public async Task> GetScoredCandidatesAsync(long receiptId) { var receipt = await _db.Receipts .FirstOrDefaultAsync(r => r.Id == receiptId); if (receipt == null) return new List(); return await FindAndScoreCandidatesAsync(receipt); } private async Task> FindAndScoreCandidatesAsync(Receipt receipt) { // Get transactions in a reasonable date range var query = _db.Transactions .Include(t => t.Card) .Include(t => t.Account) .Include(t => t.Merchant) .AsQueryable(); // Date range: use receipt date or due date // Transactions can't occur before the receipt date (you get a receipt when you buy something) DateTime? targetDate = receipt.ReceiptDate; DateTime? dueDate = receipt.DueDate; if (targetDate.HasValue || dueDate.HasValue) { // Min date is the receipt date - transactions can't precede the receipt var minDate = targetDate ?? dueDate!.Value; var maxDate = (dueDate ?? targetDate!.Value).AddDays(7); query = query.Where(t => t.Date >= minDate && t.Date <= maxDate); } else { // No date info - can't match reliably return new List(); } var candidates = await query.ToListAsync(); // Exclude transactions that already have receipts var transactionsWithReceipts = await _db.Receipts .Where(r => r.TransactionId != null && r.Id != receipt.Id) .Select(r => r.TransactionId!.Value) .Distinct() .ToListAsync(); candidates = candidates .Where(t => !transactionsWithReceipts.Contains(t.Id)) .ToList(); // Score each candidate var scored = candidates .Select(t => new ScoredCandidate { Transaction = t, Score = CalculateMatchScore(receipt, t) }) .Where(s => s.Score > 0.1) // Filter out very poor matches .OrderByDescending(s => s.Score) .ToList(); return scored; } private double CalculateMatchScore(Receipt receipt, Transaction transaction) { double score = 0; double totalWeight = 0; // Amount matching (weight: 40%) if (receipt.Total.HasValue) { const double amountWeight = 0.40; totalWeight += amountWeight; var receiptAmount = Math.Abs(receipt.Total.Value); var transactionAmount = Math.Abs(transaction.Amount); if (receiptAmount > 0) { var difference = (double)(Math.Abs(receiptAmount - transactionAmount) / receiptAmount); if (difference == 0) score += amountWeight * 1.0; else if (difference <= 0.01) // Within 1% score += amountWeight * 0.95; else if (difference <= 0.05) // Within 5% score += amountWeight * 0.80; else if (difference <= 0.10) // Within 10% score += amountWeight * 0.60; else if (difference <= 0.20) // Within 20% score += amountWeight * 0.30; // Beyond 20% = 0 points } } // Date matching (weight: 25%) if (receipt.ReceiptDate.HasValue) { const double dateWeight = 0.25; totalWeight += dateWeight; var daysDiff = Math.Abs((transaction.Date - receipt.ReceiptDate.Value).TotalDays); if (daysDiff == 0) score += dateWeight * 1.0; else if (daysDiff <= 1) score += dateWeight * 0.90; else if (daysDiff <= 3) score += dateWeight * 0.70; else if (daysDiff <= 5) score += dateWeight * 0.50; else if (daysDiff <= 7) score += dateWeight * 0.30; // Beyond 7 days = 0 points } // Due date matching for bills (weight: 10% bonus) if (receipt.DueDate.HasValue) { const double dueDateWeight = 0.10; totalWeight += dueDateWeight; var daysDiff = Math.Abs((transaction.Date - receipt.DueDate.Value).TotalDays); if (daysDiff <= 1) score += dueDateWeight * 1.0; else if (daysDiff <= 3) score += dueDateWeight * 0.70; else if (daysDiff <= 5) score += dueDateWeight * 0.40; } // Merchant/Name matching (weight: 35%) if (!string.IsNullOrWhiteSpace(receipt.Merchant)) { const double merchantWeight = 0.35; totalWeight += merchantWeight; var merchantScore = CalculateMerchantMatchScore( receipt.Merchant, transaction.Merchant?.Name, transaction.Name); score += merchantWeight * merchantScore; } // Normalize score if we didn't have all data points if (totalWeight > 0 && totalWeight < 1.0) { score = score / totalWeight; } return Math.Min(score, 1.0); } private double CalculateMerchantMatchScore(string receiptMerchant, string? transactionMerchant, string? transactionName) { var receiptLower = receiptMerchant.ToLowerInvariant().Trim(); var merchantLower = transactionMerchant?.ToLowerInvariant().Trim() ?? ""; var nameLower = transactionName?.ToLowerInvariant().Trim() ?? ""; // Exact match if (receiptLower == merchantLower || receiptLower == nameLower) return 1.0; // Contains match if (merchantLower.Contains(receiptLower) || receiptLower.Contains(merchantLower)) return 0.90; if (nameLower.Contains(receiptLower) || receiptLower.Contains(nameLower)) return 0.85; // Word-based matching var receiptWords = ExtractWords(receiptLower); var merchantWords = ExtractWords(merchantLower); var nameWords = ExtractWords(nameLower); var merchantMatchRatio = CalculateWordMatchRatio(receiptWords, merchantWords); var nameMatchRatio = CalculateWordMatchRatio(receiptWords, nameWords); return Math.Max(merchantMatchRatio, nameMatchRatio); } private static HashSet ExtractWords(string text) { return text .Split(new[] { ' ', '-', '_', '.', ',', '#', '/', '\\', '*' }, StringSplitOptions.RemoveEmptyEntries) .Where(w => w.Length > 1) // Skip single chars .ToHashSet(); } private static double CalculateWordMatchRatio(HashSet words1, HashSet words2) { if (words1.Count == 0 || words2.Count == 0) return 0; int matches = 0; foreach (var w1 in words1) { foreach (var w2 in words2) { if (w1 == w2 || w1.Contains(w2) || w2.Contains(w1)) { matches++; break; } } } // Return ratio of matched words from the smaller set var smallerCount = Math.Min(words1.Count, words2.Count); return (double)matches / smallerCount; } private async Task GetLlmMatchDecisionAsync(Receipt receipt, List candidates) { try { var prompt = BuildLlmPrompt(receipt, candidates); _logger.LogInformation("Sending receipt matching prompt to LLM for receipt {ReceiptId}", receipt.Id); var result = await _llmClient.SendTextPromptAsync(prompt); if (!result.IsSuccess) { _logger.LogWarning("LLM matching failed: {Error}", result.ErrorMessage); return null; } _logger.LogInformation("LLM response: {Content}", result.Content); return ParseLlmResponse(result.Content, candidates); } catch (Exception ex) { _logger.LogError(ex, "Error during LLM match decision"); return null; } } private static string BuildLlmPrompt(Receipt receipt, List candidates) { var sb = new StringBuilder(); sb.AppendLine("You are matching a receipt to bank transactions. Analyze and pick the best match."); sb.AppendLine(); sb.AppendLine("RECEIPT:"); sb.AppendLine($" Merchant: {receipt.Merchant ?? "Unknown"}"); sb.AppendLine($" Date: {receipt.ReceiptDate?.ToString("yyyy-MM-dd") ?? "Unknown"}"); if (receipt.DueDate.HasValue) sb.AppendLine($" Due Date: {receipt.DueDate.Value:yyyy-MM-dd}"); sb.AppendLine($" Total: {receipt.Total?.ToString("C") ?? "Unknown"}"); sb.AppendLine(); sb.AppendLine("CANDIDATE TRANSACTIONS:"); for (int i = 0; i < candidates.Count; i++) { var t = candidates[i].Transaction; sb.AppendLine($" [{i + 1}] ID={t.Id}"); sb.AppendLine($" Name: {t.Name}"); if (t.Merchant != null) sb.AppendLine($" Merchant: {t.Merchant.Name}"); sb.AppendLine($" Date: {t.Date:yyyy-MM-dd}"); sb.AppendLine($" Amount: {t.Amount:C}"); sb.AppendLine($" Current Score: {candidates[i].Score:P0}"); sb.AppendLine(); } sb.AppendLine("Respond with JSON only:"); sb.AppendLine("{"); sb.AppendLine(" \"match_index\": <1-based index of best match, or 0 if none match>,"); sb.AppendLine(" \"confidence\": <0.0 to 1.0>,"); sb.AppendLine(" \"reason\": \"\""); sb.AppendLine("}"); return sb.ToString(); } private LlmMatchResult? ParseLlmResponse(string? content, List candidates) { if (string.IsNullOrWhiteSpace(content)) return null; try { var json = JsonSerializer.Deserialize(content); var matchIndex = json.GetProperty("match_index").GetInt32(); var confidence = json.GetProperty("confidence").GetDouble(); if (matchIndex <= 0 || matchIndex > candidates.Count) return null; return new LlmMatchResult { TransactionId = candidates[matchIndex - 1].Transaction.Id, Confidence = confidence }; } catch (Exception ex) { _logger.LogWarning(ex, "Failed to parse LLM response: {Content}", content); return null; } } } public class ScoredCandidate { public required Transaction Transaction { get; set; } public double Score { get; set; } } public class LlmMatchResult { public long TransactionId { get; set; } public double Confidence { get; set; } } public class ReceiptAutoMapResult { public AutoMapStatus Status { get; init; } public long? TransactionId { get; init; } public List MultipleMatches { get; init; } = new(); public string? Message { get; init; } public static ReceiptAutoMapResult Success(long transactionId) => new() { Status = AutoMapStatus.Success, TransactionId = transactionId }; public static ReceiptAutoMapResult AlreadyMapped(long transactionId) => new() { Status = AutoMapStatus.AlreadyMapped, TransactionId = transactionId }; public static ReceiptAutoMapResult NoMatch() => new() { Status = AutoMapStatus.NoMatch, Message = "No matching transaction found." }; public static ReceiptAutoMapResult WithMultipleMatches(List matches) => new() { Status = AutoMapStatus.MultipleMatches, MultipleMatches = matches, Message = $"Found {matches.Count} potential matches." }; public static ReceiptAutoMapResult NotParsed() => new() { Status = AutoMapStatus.NotParsed, Message = "Receipt has not been parsed yet." }; public static ReceiptAutoMapResult Failure(string message) => new() { Status = AutoMapStatus.Failed, Message = message }; } public class BulkAutoMapResult { public int TotalProcessed { get; set; } public int MappedCount { get; set; } public int NoMatchCount { get; set; } public int MultipleMatchesCount { get; set; } } public enum AutoMapStatus { Success, AlreadyMapped, NoMatch, MultipleMatches, NotParsed, Failed } }