Filter duplicate check by date range for better performance
Optimize duplicate detection to only query existing transactions within the date range of the uploaded CSV file (plus 1-day buffer). This prevents loading the entire transaction history into memory when checking duplicates. For example, uploading 2800 transactions from Jan-Mar 2024 will now only load existing transactions from that period rather than all historical transactions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -255,35 +255,64 @@ namespace MoneyMap.Pages
|
||||
var previewItems = new List<TransactionPreview>();
|
||||
var addedInThisBatch = new HashSet<TransactionKey>();
|
||||
|
||||
// Load all existing transactions into memory for fast duplicate checking
|
||||
var existingTransactions = await _db.Transactions
|
||||
.Select(t => new TransactionKey(t.Date, t.Amount, t.Name, t.Memo, t.AccountId, t.CardId))
|
||||
.ToHashSetAsync();
|
||||
// First pass: read CSV to get date range and all transactions
|
||||
var csvTransactions = new List<(TransactionCsvRow Row, Transaction Transaction, TransactionKey Key)>();
|
||||
DateTime? minDate = null;
|
||||
DateTime? maxDate = null;
|
||||
|
||||
using var reader = new StreamReader(csvStream);
|
||||
using var csv = new CsvReader(reader, new CsvConfiguration(CultureInfo.InvariantCulture)
|
||||
using (var reader = new StreamReader(csvStream))
|
||||
using (var csv = new CsvReader(reader, new CsvConfiguration(CultureInfo.InvariantCulture)
|
||||
{
|
||||
HasHeaderRecord = true,
|
||||
HeaderValidated = null,
|
||||
MissingFieldFound = null
|
||||
});
|
||||
|
||||
csv.Read();
|
||||
csv.ReadHeader();
|
||||
var hasCategory = csv.HeaderRecord?.Any(h => h.Equals("Category", StringComparison.OrdinalIgnoreCase)) ?? false;
|
||||
csv.Context.RegisterClassMap(new TransactionCsvRowMap(hasCategory));
|
||||
|
||||
while (csv.Read())
|
||||
}))
|
||||
{
|
||||
var row = csv.GetRecord<TransactionCsvRow>();
|
||||
csv.Read();
|
||||
csv.ReadHeader();
|
||||
var hasCategory = csv.HeaderRecord?.Any(h => h.Equals("Category", StringComparison.OrdinalIgnoreCase)) ?? false;
|
||||
csv.Context.RegisterClassMap(new TransactionCsvRowMap(hasCategory));
|
||||
|
||||
var paymentResolution = await _cardResolver.ResolvePaymentAsync(row.Memo, context);
|
||||
if (!paymentResolution.IsSuccess)
|
||||
return PreviewOperationResult.Failure(paymentResolution.ErrorMessage!);
|
||||
while (csv.Read())
|
||||
{
|
||||
var row = csv.GetRecord<TransactionCsvRow>();
|
||||
|
||||
var transaction = MapToTransaction(row, paymentResolution);
|
||||
var key = new TransactionKey(transaction);
|
||||
var paymentResolution = await _cardResolver.ResolvePaymentAsync(row.Memo, context);
|
||||
if (!paymentResolution.IsSuccess)
|
||||
return PreviewOperationResult.Failure(paymentResolution.ErrorMessage!);
|
||||
|
||||
var transaction = MapToTransaction(row, paymentResolution);
|
||||
var key = new TransactionKey(transaction);
|
||||
|
||||
csvTransactions.Add((row, transaction, key));
|
||||
|
||||
// Track date range
|
||||
if (minDate == null || transaction.Date < minDate) minDate = transaction.Date;
|
||||
if (maxDate == null || transaction.Date > maxDate) maxDate = transaction.Date;
|
||||
}
|
||||
}
|
||||
|
||||
// Load existing transactions within the date range for fast duplicate checking
|
||||
HashSet<TransactionKey> existingTransactions;
|
||||
if (minDate.HasValue && maxDate.HasValue)
|
||||
{
|
||||
// Add a buffer of 1 day on each side to catch any edge cases
|
||||
var startDate = minDate.Value.AddDays(-1);
|
||||
var endDate = maxDate.Value.AddDays(1);
|
||||
|
||||
existingTransactions = await _db.Transactions
|
||||
.Where(t => t.Date >= startDate && t.Date <= endDate)
|
||||
.Select(t => new TransactionKey(t.Date, t.Amount, t.Name, t.Memo, t.AccountId, t.CardId))
|
||||
.ToHashSetAsync();
|
||||
}
|
||||
else
|
||||
{
|
||||
existingTransactions = new HashSet<TransactionKey>();
|
||||
}
|
||||
|
||||
// Second pass: check for duplicates and build preview
|
||||
foreach (var (row, transaction, key) in csvTransactions)
|
||||
{
|
||||
// Fast in-memory duplicate checking
|
||||
bool isDuplicate = addedInThisBatch.Contains(key) || existingTransactions.Contains(key);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user