From 3153bd50aa440468449959afc8b0438e03c5a275 Mon Sep 17 00:00:00 2001 From: AJ Date: Sat, 11 Oct 2025 22:22:02 -0400 Subject: [PATCH] Filter duplicate check by date range for better performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimize duplicate detection to only query existing transactions within the date range of the uploaded CSV file (plus 1-day buffer). This prevents loading the entire transaction history into memory when checking duplicates. For example, uploading 2800 transactions from Jan-Mar 2024 will now only load existing transactions from that period rather than all historical transactions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- MoneyMap/Pages/Upload.cshtml.cs | 69 +++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/MoneyMap/Pages/Upload.cshtml.cs b/MoneyMap/Pages/Upload.cshtml.cs index 1dc8858..6972743 100644 --- a/MoneyMap/Pages/Upload.cshtml.cs +++ b/MoneyMap/Pages/Upload.cshtml.cs @@ -255,35 +255,64 @@ namespace MoneyMap.Pages var previewItems = new List(); var addedInThisBatch = new HashSet(); - // Load all existing transactions into memory for fast duplicate checking - var existingTransactions = await _db.Transactions - .Select(t => new TransactionKey(t.Date, t.Amount, t.Name, t.Memo, t.AccountId, t.CardId)) - .ToHashSetAsync(); + // First pass: read CSV to get date range and all transactions + var csvTransactions = new List<(TransactionCsvRow Row, Transaction Transaction, TransactionKey Key)>(); + DateTime? minDate = null; + DateTime? maxDate = null; - using var reader = new StreamReader(csvStream); - using var csv = new CsvReader(reader, new CsvConfiguration(CultureInfo.InvariantCulture) + using (var reader = new StreamReader(csvStream)) + using (var csv = new CsvReader(reader, new CsvConfiguration(CultureInfo.InvariantCulture) { HasHeaderRecord = true, HeaderValidated = null, MissingFieldFound = null - }); - - csv.Read(); - csv.ReadHeader(); - var hasCategory = csv.HeaderRecord?.Any(h => h.Equals("Category", StringComparison.OrdinalIgnoreCase)) ?? false; - csv.Context.RegisterClassMap(new TransactionCsvRowMap(hasCategory)); - - while (csv.Read()) + })) { - var row = csv.GetRecord(); + csv.Read(); + csv.ReadHeader(); + var hasCategory = csv.HeaderRecord?.Any(h => h.Equals("Category", StringComparison.OrdinalIgnoreCase)) ?? false; + csv.Context.RegisterClassMap(new TransactionCsvRowMap(hasCategory)); - var paymentResolution = await _cardResolver.ResolvePaymentAsync(row.Memo, context); - if (!paymentResolution.IsSuccess) - return PreviewOperationResult.Failure(paymentResolution.ErrorMessage!); + while (csv.Read()) + { + var row = csv.GetRecord(); - var transaction = MapToTransaction(row, paymentResolution); - var key = new TransactionKey(transaction); + var paymentResolution = await _cardResolver.ResolvePaymentAsync(row.Memo, context); + if (!paymentResolution.IsSuccess) + return PreviewOperationResult.Failure(paymentResolution.ErrorMessage!); + var transaction = MapToTransaction(row, paymentResolution); + var key = new TransactionKey(transaction); + + csvTransactions.Add((row, transaction, key)); + + // Track date range + if (minDate == null || transaction.Date < minDate) minDate = transaction.Date; + if (maxDate == null || transaction.Date > maxDate) maxDate = transaction.Date; + } + } + + // Load existing transactions within the date range for fast duplicate checking + HashSet existingTransactions; + if (minDate.HasValue && maxDate.HasValue) + { + // Add a buffer of 1 day on each side to catch any edge cases + var startDate = minDate.Value.AddDays(-1); + var endDate = maxDate.Value.AddDays(1); + + existingTransactions = await _db.Transactions + .Where(t => t.Date >= startDate && t.Date <= endDate) + .Select(t => new TransactionKey(t.Date, t.Amount, t.Name, t.Memo, t.AccountId, t.CardId)) + .ToHashSetAsync(); + } + else + { + existingTransactions = new HashSet(); + } + + // Second pass: check for duplicates and build preview + foreach (var (row, transaction, key) in csvTransactions) + { // Fast in-memory duplicate checking bool isDuplicate = addedInThisBatch.Contains(key) || existingTransactions.Contains(key);