From fa142288b6a105bef389839b579a2f35567deed6 Mon Sep 17 00:00:00 2001 From: AJ Date: Sun, 12 Oct 2025 19:33:47 -0400 Subject: [PATCH] Add intelligent word-based merchant matching for receipt mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implemented word-based relevance scoring to intelligently sort transactions when multiple matches exist within the amount tolerance. The system now: 1. Splits receipt merchant name into words (handling spaces, dashes, underscores, dots) 2. Compares each word against transaction merchant name and transaction name 3. Scores based on matching word count (bidirectional substring matching) 4. Exact matches get highest priority (score 1000) 5. Word matches get scored (10 points per matching word) 6. Sorts by relevance score, then by date Examples: - Receipt "Duke Energy" matches "DUKE ENERGY CORPORATION" better than "WALMART" - Receipt "McDonald's" matches "MCDONALD'S #12345" better than "BURGER KING" - Receipt "Comcast" matches "COMCAST CABLE" better than "VERIZON" This dramatically improves auto-mapping success rate and puts the most likely transaction at the top of the manual selection list. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- MoneyMap/Pages/Receipts.cshtml.cs | 26 ++++++++---------- MoneyMap/Services/ReceiptAutoMapper.cs | 38 ++++++++++++++++++++------ 2 files changed, 41 insertions(+), 23 deletions(-) diff --git a/MoneyMap/Pages/Receipts.cshtml.cs b/MoneyMap/Pages/Receipts.cshtml.cs index 211f5dd..d0f377f 100644 --- a/MoneyMap/Pages/Receipts.cshtml.cs +++ b/MoneyMap/Pages/Receipts.cshtml.cs @@ -254,32 +254,30 @@ namespace MoneyMap.Pages var candidates = await query .ToListAsync(); - // If receipt has merchant, sort matches by relevance (but don't exclude) + // Sort by merchant/name relevance using word matching if (!string.IsNullOrWhiteSpace(receipt.Merchant)) { - var merchantLower = receipt.Merchant.ToLower(); + var receiptWords = receipt.Merchant.ToLower().Split(new[] { ' ', '-', '_', '.' }, StringSplitOptions.RemoveEmptyEntries); - // Sort: exact matches first, then partial matches, then others candidates = candidates .OrderByDescending(t => { var merchantName = t.Merchant?.Name?.ToLower() ?? ""; var transactionName = t.Name?.ToLower() ?? ""; - // Exact match on merchant or transaction name - if (merchantName == merchantLower || transactionName == merchantLower) - return 3; + // Exact match + if (merchantName == receipt.Merchant.ToLower() || transactionName == receipt.Merchant.ToLower()) + return 1000; - // Contains match on merchant - if (merchantName.Contains(merchantLower)) - return 2; + // Count matching words + var merchantWords = merchantName.Split(new[] { ' ', '-', '_', '.' }, StringSplitOptions.RemoveEmptyEntries); + var transactionWords = transactionName.Split(new[] { ' ', '-', '_', '.' }, StringSplitOptions.RemoveEmptyEntries); - // Contains match on transaction name - if (transactionName.Contains(merchantLower)) - return 1; + var merchantMatches = receiptWords.Count(rw => merchantWords.Any(mw => mw.Contains(rw) || rw.Contains(mw))); + var transactionMatches = receiptWords.Count(rw => transactionWords.Any(tw => tw.Contains(rw) || rw.Contains(tw))); - // No match - return 0; + // Return the higher match count + return Math.Max(merchantMatches * 10, transactionMatches * 10); }) .ThenByDescending(t => t.Date) .ThenByDescending(t => t.Id) diff --git a/MoneyMap/Services/ReceiptAutoMapper.cs b/MoneyMap/Services/ReceiptAutoMapper.cs index 883ea44..1fbbb82 100644 --- a/MoneyMap/Services/ReceiptAutoMapper.cs +++ b/MoneyMap/Services/ReceiptAutoMapper.cs @@ -118,18 +118,38 @@ namespace MoneyMap.Services return new List(); } - // Filter by merchant if available - if (!string.IsNullOrWhiteSpace(receipt.Merchant)) - { - // Try to find matching merchant name - query = query.Where(t => - (t.Merchant != null && t.Merchant.Name.Contains(receipt.Merchant)) || - t.Name.Contains(receipt.Merchant)); - } - // Get candidates var candidates = await query.ToListAsync(); + // Sort by merchant/name relevance using word matching if merchant available + if (!string.IsNullOrWhiteSpace(receipt.Merchant)) + { + var receiptWords = receipt.Merchant.ToLower().Split(new[] { ' ', '-', '_', '.' }, StringSplitOptions.RemoveEmptyEntries); + + candidates = candidates + .OrderByDescending(t => + { + var merchantName = t.Merchant?.Name?.ToLower() ?? ""; + var transactionName = t.Name?.ToLower() ?? ""; + + // Exact match + if (merchantName == receipt.Merchant.ToLower() || transactionName == receipt.Merchant.ToLower()) + return 1000; + + // Count matching words + var merchantWords = merchantName.Split(new[] { ' ', '-', '_', '.' }, StringSplitOptions.RemoveEmptyEntries); + var transactionWords = transactionName.Split(new[] { ' ', '-', '_', '.' }, StringSplitOptions.RemoveEmptyEntries); + + var merchantMatches = receiptWords.Count(rw => merchantWords.Any(mw => mw.Contains(rw) || rw.Contains(mw))); + var transactionMatches = receiptWords.Count(rw => transactionWords.Any(tw => tw.Contains(rw) || rw.Contains(tw))); + + // Return the higher match count + return Math.Max(merchantMatches * 10, transactionMatches * 10); + }) + .ThenByDescending(t => t.Date) + .ToList(); + } + // If we have a total amount, filter by amount match (±10% tolerance) if (receipt.Total.HasValue) {