3b01efd8a6
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
496 lines
19 KiB
C#
496 lines
19 KiB
C#
using Microsoft.EntityFrameworkCore;
|
|
using MoneyMap.Data;
|
|
using MoneyMap.Models;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
|
|
namespace MoneyMap.Services
|
|
{
|
|
public interface IReceiptAutoMapper
|
|
{
|
|
Task<ReceiptAutoMapResult> AutoMapReceiptAsync(long receiptId);
|
|
Task<BulkAutoMapResult> AutoMapUnmappedReceiptsAsync();
|
|
Task<List<ScoredCandidate>> GetScoredCandidatesAsync(long receiptId);
|
|
}
|
|
|
|
public class ReceiptAutoMapper : IReceiptAutoMapper
|
|
{
|
|
private readonly MoneyMapContext _db;
|
|
private readonly IReceiptManager _receiptManager;
|
|
private readonly LlamaCppVisionClient _llmClient;
|
|
private readonly ILogger<ReceiptAutoMapper> _logger;
|
|
|
|
// Confidence thresholds
|
|
private const double AutoMapThreshold = 0.85; // Auto-map if score >= 85%
|
|
private const double LlmReviewThreshold = 0.50; // Use LLM if score between 50-85%
|
|
|
|
public ReceiptAutoMapper(
|
|
MoneyMapContext db,
|
|
IReceiptManager receiptManager,
|
|
LlamaCppVisionClient llmClient,
|
|
ILogger<ReceiptAutoMapper> logger)
|
|
{
|
|
_db = db;
|
|
_receiptManager = receiptManager;
|
|
_llmClient = llmClient;
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<ReceiptAutoMapResult> AutoMapReceiptAsync(long receiptId)
|
|
{
|
|
var receipt = await _db.Receipts
|
|
.Include(r => r.Transaction)
|
|
.FirstOrDefaultAsync(r => r.Id == receiptId);
|
|
|
|
if (receipt == null)
|
|
return ReceiptAutoMapResult.Failure("Receipt not found.");
|
|
|
|
if (receipt.TransactionId.HasValue)
|
|
return ReceiptAutoMapResult.AlreadyMapped(receipt.TransactionId.Value);
|
|
|
|
if (string.IsNullOrWhiteSpace(receipt.Merchant) && !receipt.ReceiptDate.HasValue && !receipt.Total.HasValue)
|
|
return ReceiptAutoMapResult.NotParsed();
|
|
|
|
var scoredCandidates = await FindAndScoreCandidatesAsync(receipt);
|
|
|
|
if (scoredCandidates.Count == 0)
|
|
return ReceiptAutoMapResult.NoMatch();
|
|
|
|
var bestMatch = scoredCandidates[0];
|
|
|
|
// High confidence - auto-map directly
|
|
if (bestMatch.Score >= AutoMapThreshold)
|
|
{
|
|
_logger.LogInformation(
|
|
"Auto-mapping receipt {ReceiptId} to transaction {TransactionId} with score {Score:P0}",
|
|
receiptId, bestMatch.Transaction.Id, bestMatch.Score);
|
|
|
|
var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, bestMatch.Transaction.Id);
|
|
return success
|
|
? ReceiptAutoMapResult.Success(bestMatch.Transaction.Id)
|
|
: ReceiptAutoMapResult.Failure("Failed to map receipt to transaction.");
|
|
}
|
|
|
|
// Medium confidence - use LLM to decide
|
|
if (bestMatch.Score >= LlmReviewThreshold)
|
|
{
|
|
var topCandidates = scoredCandidates.Take(5).ToList();
|
|
var llmResult = await GetLlmMatchDecisionAsync(receipt, topCandidates);
|
|
|
|
if (llmResult != null && llmResult.Confidence >= 0.7)
|
|
{
|
|
_logger.LogInformation(
|
|
"LLM matched receipt {ReceiptId} to transaction {TransactionId} with confidence {Confidence:P0}",
|
|
receiptId, llmResult.TransactionId, llmResult.Confidence);
|
|
|
|
var success = await _receiptManager.MapReceiptToTransactionAsync(receiptId, llmResult.TransactionId);
|
|
return success
|
|
? ReceiptAutoMapResult.Success(llmResult.TransactionId)
|
|
: ReceiptAutoMapResult.Failure("Failed to map receipt to transaction.");
|
|
}
|
|
|
|
// LLM uncertain - return multiple matches for manual review
|
|
return ReceiptAutoMapResult.WithMultipleMatches(
|
|
topCandidates.Select(c => c.Transaction).ToList());
|
|
}
|
|
|
|
// Low confidence - no good matches
|
|
if (scoredCandidates.Count > 1)
|
|
{
|
|
return ReceiptAutoMapResult.WithMultipleMatches(
|
|
scoredCandidates.Take(5).Select(c => c.Transaction).ToList());
|
|
}
|
|
|
|
return ReceiptAutoMapResult.NoMatch();
|
|
}
|
|
|
|
public async Task<BulkAutoMapResult> AutoMapUnmappedReceiptsAsync()
|
|
{
|
|
var unmappedReceipts = await _db.Receipts
|
|
.Where(r => r.TransactionId == null)
|
|
.Where(r => r.Merchant != null || r.ReceiptDate != null || r.Total != null)
|
|
.ToListAsync();
|
|
|
|
var result = new BulkAutoMapResult();
|
|
|
|
foreach (var receipt in unmappedReceipts)
|
|
{
|
|
var mapResult = await AutoMapReceiptAsync(receipt.Id);
|
|
|
|
if (mapResult.Status == AutoMapStatus.Success)
|
|
result.MappedCount++;
|
|
else if (mapResult.Status == AutoMapStatus.MultipleMatches)
|
|
result.MultipleMatchesCount++;
|
|
else if (mapResult.Status == AutoMapStatus.NoMatch)
|
|
result.NoMatchCount++;
|
|
}
|
|
|
|
result.TotalProcessed = unmappedReceipts.Count;
|
|
return result;
|
|
}
|
|
|
|
public async Task<List<ScoredCandidate>> GetScoredCandidatesAsync(long receiptId)
|
|
{
|
|
var receipt = await _db.Receipts
|
|
.FirstOrDefaultAsync(r => r.Id == receiptId);
|
|
|
|
if (receipt == null)
|
|
return new List<ScoredCandidate>();
|
|
|
|
return await FindAndScoreCandidatesAsync(receipt);
|
|
}
|
|
|
|
private async Task<List<ScoredCandidate>> FindAndScoreCandidatesAsync(Receipt receipt)
|
|
{
|
|
// Get transactions in a reasonable date range
|
|
var query = _db.Transactions
|
|
.Include(t => t.Card)
|
|
.Include(t => t.Account)
|
|
.Include(t => t.Merchant)
|
|
.AsQueryable();
|
|
|
|
// Date range: use receipt date or due date
|
|
// Transactions can't occur before the receipt date (you get a receipt when you buy something)
|
|
DateTime? targetDate = receipt.ReceiptDate;
|
|
DateTime? dueDate = receipt.DueDate;
|
|
|
|
if (targetDate.HasValue || dueDate.HasValue)
|
|
{
|
|
// Min date is the receipt date - transactions can't precede the receipt
|
|
var minDate = targetDate ?? dueDate!.Value;
|
|
var maxDate = (dueDate ?? targetDate!.Value).AddDays(7);
|
|
query = query.Where(t => t.Date >= minDate && t.Date <= maxDate);
|
|
}
|
|
else
|
|
{
|
|
// No date info - can't match reliably
|
|
return new List<ScoredCandidate>();
|
|
}
|
|
|
|
var candidates = await query.ToListAsync();
|
|
|
|
// Exclude transactions that already have receipts
|
|
var transactionsWithReceipts = await _db.Receipts
|
|
.Where(r => r.TransactionId != null && r.Id != receipt.Id)
|
|
.Select(r => r.TransactionId!.Value)
|
|
.Distinct()
|
|
.ToListAsync();
|
|
|
|
candidates = candidates
|
|
.Where(t => !transactionsWithReceipts.Contains(t.Id))
|
|
.ToList();
|
|
|
|
// Score each candidate
|
|
var scored = candidates
|
|
.Select(t => new ScoredCandidate
|
|
{
|
|
Transaction = t,
|
|
Score = CalculateMatchScore(receipt, t)
|
|
})
|
|
.Where(s => s.Score > 0.1) // Filter out very poor matches
|
|
.OrderByDescending(s => s.Score)
|
|
.ToList();
|
|
|
|
return scored;
|
|
}
|
|
|
|
private double CalculateMatchScore(Receipt receipt, Transaction transaction)
|
|
{
|
|
double score = 0;
|
|
double totalWeight = 0;
|
|
|
|
// Amount matching (weight: 40%)
|
|
if (receipt.Total.HasValue)
|
|
{
|
|
const double amountWeight = 0.40;
|
|
totalWeight += amountWeight;
|
|
|
|
var receiptAmount = Math.Abs(receipt.Total.Value);
|
|
var transactionAmount = Math.Abs(transaction.Amount);
|
|
|
|
if (receiptAmount > 0)
|
|
{
|
|
var difference = (double)(Math.Abs(receiptAmount - transactionAmount) / receiptAmount);
|
|
|
|
if (difference == 0)
|
|
score += amountWeight * 1.0;
|
|
else if (difference <= 0.01) // Within 1%
|
|
score += amountWeight * 0.95;
|
|
else if (difference <= 0.05) // Within 5%
|
|
score += amountWeight * 0.80;
|
|
else if (difference <= 0.10) // Within 10%
|
|
score += amountWeight * 0.60;
|
|
else if (difference <= 0.20) // Within 20%
|
|
score += amountWeight * 0.30;
|
|
// Beyond 20% = 0 points
|
|
}
|
|
}
|
|
|
|
// Date matching (weight: 25%)
|
|
if (receipt.ReceiptDate.HasValue)
|
|
{
|
|
const double dateWeight = 0.25;
|
|
totalWeight += dateWeight;
|
|
|
|
var daysDiff = Math.Abs((transaction.Date - receipt.ReceiptDate.Value).TotalDays);
|
|
|
|
if (daysDiff == 0)
|
|
score += dateWeight * 1.0;
|
|
else if (daysDiff <= 1)
|
|
score += dateWeight * 0.90;
|
|
else if (daysDiff <= 3)
|
|
score += dateWeight * 0.70;
|
|
else if (daysDiff <= 5)
|
|
score += dateWeight * 0.50;
|
|
else if (daysDiff <= 7)
|
|
score += dateWeight * 0.30;
|
|
// Beyond 7 days = 0 points
|
|
}
|
|
|
|
// Due date matching for bills (weight: 10% bonus)
|
|
if (receipt.DueDate.HasValue)
|
|
{
|
|
const double dueDateWeight = 0.10;
|
|
totalWeight += dueDateWeight;
|
|
|
|
var daysDiff = Math.Abs((transaction.Date - receipt.DueDate.Value).TotalDays);
|
|
|
|
if (daysDiff <= 1)
|
|
score += dueDateWeight * 1.0;
|
|
else if (daysDiff <= 3)
|
|
score += dueDateWeight * 0.70;
|
|
else if (daysDiff <= 5)
|
|
score += dueDateWeight * 0.40;
|
|
}
|
|
|
|
// Merchant/Name matching (weight: 35%)
|
|
if (!string.IsNullOrWhiteSpace(receipt.Merchant))
|
|
{
|
|
const double merchantWeight = 0.35;
|
|
totalWeight += merchantWeight;
|
|
|
|
var merchantScore = CalculateMerchantMatchScore(
|
|
receipt.Merchant,
|
|
transaction.Merchant?.Name,
|
|
transaction.Name);
|
|
|
|
score += merchantWeight * merchantScore;
|
|
}
|
|
|
|
// Normalize score if we didn't have all data points
|
|
if (totalWeight > 0 && totalWeight < 1.0)
|
|
{
|
|
score = score / totalWeight;
|
|
}
|
|
|
|
return Math.Min(score, 1.0);
|
|
}
|
|
|
|
private double CalculateMerchantMatchScore(string receiptMerchant, string? transactionMerchant, string? transactionName)
|
|
{
|
|
var receiptLower = receiptMerchant.ToLowerInvariant().Trim();
|
|
var merchantLower = transactionMerchant?.ToLowerInvariant().Trim() ?? "";
|
|
var nameLower = transactionName?.ToLowerInvariant().Trim() ?? "";
|
|
|
|
// Exact match
|
|
if (receiptLower == merchantLower || receiptLower == nameLower)
|
|
return 1.0;
|
|
|
|
// Contains match
|
|
if (merchantLower.Contains(receiptLower) || receiptLower.Contains(merchantLower))
|
|
return 0.90;
|
|
if (nameLower.Contains(receiptLower) || receiptLower.Contains(nameLower))
|
|
return 0.85;
|
|
|
|
// Word-based matching
|
|
var receiptWords = ExtractWords(receiptLower);
|
|
var merchantWords = ExtractWords(merchantLower);
|
|
var nameWords = ExtractWords(nameLower);
|
|
|
|
var merchantMatchRatio = CalculateWordMatchRatio(receiptWords, merchantWords);
|
|
var nameMatchRatio = CalculateWordMatchRatio(receiptWords, nameWords);
|
|
|
|
return Math.Max(merchantMatchRatio, nameMatchRatio);
|
|
}
|
|
|
|
private static HashSet<string> ExtractWords(string text)
|
|
{
|
|
return text
|
|
.Split(new[] { ' ', '-', '_', '.', ',', '#', '/', '\\', '*' }, StringSplitOptions.RemoveEmptyEntries)
|
|
.Where(w => w.Length > 1) // Skip single chars
|
|
.ToHashSet();
|
|
}
|
|
|
|
private static double CalculateWordMatchRatio(HashSet<string> words1, HashSet<string> words2)
|
|
{
|
|
if (words1.Count == 0 || words2.Count == 0)
|
|
return 0;
|
|
|
|
int matches = 0;
|
|
foreach (var w1 in words1)
|
|
{
|
|
foreach (var w2 in words2)
|
|
{
|
|
if (w1 == w2 || w1.Contains(w2) || w2.Contains(w1))
|
|
{
|
|
matches++;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Return ratio of matched words from the smaller set
|
|
var smallerCount = Math.Min(words1.Count, words2.Count);
|
|
return (double)matches / smallerCount;
|
|
}
|
|
|
|
private async Task<LlmMatchResult?> GetLlmMatchDecisionAsync(Receipt receipt, List<ScoredCandidate> candidates)
|
|
{
|
|
try
|
|
{
|
|
var prompt = BuildLlmPrompt(receipt, candidates);
|
|
|
|
_logger.LogInformation("Sending receipt matching prompt to LLM for receipt {ReceiptId}", receipt.Id);
|
|
|
|
var result = await _llmClient.SendTextPromptAsync(prompt);
|
|
|
|
if (!result.IsSuccess)
|
|
{
|
|
_logger.LogWarning("LLM matching failed: {Error}", result.ErrorMessage);
|
|
return null;
|
|
}
|
|
|
|
_logger.LogInformation("LLM response: {Content}", result.Content);
|
|
|
|
return ParseLlmResponse(result.Content, candidates);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Error during LLM match decision");
|
|
return null;
|
|
}
|
|
}
|
|
|
|
private static string BuildLlmPrompt(Receipt receipt, List<ScoredCandidate> candidates)
|
|
{
|
|
var sb = new StringBuilder();
|
|
sb.AppendLine("You are matching a receipt to bank transactions. Analyze and pick the best match.");
|
|
sb.AppendLine();
|
|
sb.AppendLine("RECEIPT:");
|
|
sb.AppendLine($" Merchant: {receipt.Merchant ?? "Unknown"}");
|
|
sb.AppendLine($" Date: {receipt.ReceiptDate?.ToString("yyyy-MM-dd") ?? "Unknown"}");
|
|
if (receipt.DueDate.HasValue)
|
|
sb.AppendLine($" Due Date: {receipt.DueDate.Value:yyyy-MM-dd}");
|
|
sb.AppendLine($" Total: {receipt.Total?.ToString("C") ?? "Unknown"}");
|
|
sb.AppendLine();
|
|
sb.AppendLine("CANDIDATE TRANSACTIONS:");
|
|
|
|
for (int i = 0; i < candidates.Count; i++)
|
|
{
|
|
var t = candidates[i].Transaction;
|
|
sb.AppendLine($" [{i + 1}] ID={t.Id}");
|
|
sb.AppendLine($" Name: {t.Name}");
|
|
if (t.Merchant != null)
|
|
sb.AppendLine($" Merchant: {t.Merchant.Name}");
|
|
sb.AppendLine($" Date: {t.Date:yyyy-MM-dd}");
|
|
sb.AppendLine($" Amount: {t.Amount:C}");
|
|
sb.AppendLine($" Current Score: {candidates[i].Score:P0}");
|
|
sb.AppendLine();
|
|
}
|
|
|
|
sb.AppendLine("Respond with JSON only:");
|
|
sb.AppendLine("{");
|
|
sb.AppendLine(" \"match_index\": <1-based index of best match, or 0 if none match>,");
|
|
sb.AppendLine(" \"confidence\": <0.0 to 1.0>,");
|
|
sb.AppendLine(" \"reason\": \"<brief explanation>\"");
|
|
sb.AppendLine("}");
|
|
|
|
return sb.ToString();
|
|
}
|
|
|
|
private LlmMatchResult? ParseLlmResponse(string? content, List<ScoredCandidate> candidates)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(content))
|
|
return null;
|
|
|
|
try
|
|
{
|
|
var json = JsonSerializer.Deserialize<JsonElement>(content);
|
|
|
|
var matchIndex = json.GetProperty("match_index").GetInt32();
|
|
var confidence = json.GetProperty("confidence").GetDouble();
|
|
|
|
if (matchIndex <= 0 || matchIndex > candidates.Count)
|
|
return null;
|
|
|
|
return new LlmMatchResult
|
|
{
|
|
TransactionId = candidates[matchIndex - 1].Transaction.Id,
|
|
Confidence = confidence
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Failed to parse LLM response: {Content}", content);
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
|
|
public class ScoredCandidate
|
|
{
|
|
public required Transaction Transaction { get; set; }
|
|
public double Score { get; set; }
|
|
}
|
|
|
|
public class LlmMatchResult
|
|
{
|
|
public long TransactionId { get; set; }
|
|
public double Confidence { get; set; }
|
|
}
|
|
|
|
public class ReceiptAutoMapResult
|
|
{
|
|
public AutoMapStatus Status { get; init; }
|
|
public long? TransactionId { get; init; }
|
|
public List<Transaction> MultipleMatches { get; init; } = new();
|
|
public string? Message { get; init; }
|
|
|
|
public static ReceiptAutoMapResult Success(long transactionId) =>
|
|
new() { Status = AutoMapStatus.Success, TransactionId = transactionId };
|
|
|
|
public static ReceiptAutoMapResult AlreadyMapped(long transactionId) =>
|
|
new() { Status = AutoMapStatus.AlreadyMapped, TransactionId = transactionId };
|
|
|
|
public static ReceiptAutoMapResult NoMatch() =>
|
|
new() { Status = AutoMapStatus.NoMatch, Message = "No matching transaction found." };
|
|
|
|
public static ReceiptAutoMapResult WithMultipleMatches(List<Transaction> matches) =>
|
|
new() { Status = AutoMapStatus.MultipleMatches, MultipleMatches = matches, Message = $"Found {matches.Count} potential matches." };
|
|
|
|
public static ReceiptAutoMapResult NotParsed() =>
|
|
new() { Status = AutoMapStatus.NotParsed, Message = "Receipt has not been parsed yet." };
|
|
|
|
public static ReceiptAutoMapResult Failure(string message) =>
|
|
new() { Status = AutoMapStatus.Failed, Message = message };
|
|
}
|
|
|
|
public class BulkAutoMapResult
|
|
{
|
|
public int TotalProcessed { get; set; }
|
|
public int MappedCount { get; set; }
|
|
public int NoMatchCount { get; set; }
|
|
public int MultipleMatchesCount { get; set; }
|
|
}
|
|
|
|
public enum AutoMapStatus
|
|
{
|
|
Success,
|
|
AlreadyMapped,
|
|
NoMatch,
|
|
MultipleMatches,
|
|
NotParsed,
|
|
Failed
|
|
}
|
|
}
|