Filter duplicate check by date range for better performance
Optimize duplicate detection to only query existing transactions within the date range of the uploaded CSV file (plus 1-day buffer). This prevents loading the entire transaction history into memory when checking duplicates. For example, uploading 2800 transactions from Jan-Mar 2024 will now only load existing transactions from that period rather than all historical transactions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -255,19 +255,19 @@ namespace MoneyMap.Pages
|
|||||||
var previewItems = new List<TransactionPreview>();
|
var previewItems = new List<TransactionPreview>();
|
||||||
var addedInThisBatch = new HashSet<TransactionKey>();
|
var addedInThisBatch = new HashSet<TransactionKey>();
|
||||||
|
|
||||||
// Load all existing transactions into memory for fast duplicate checking
|
// First pass: read CSV to get date range and all transactions
|
||||||
var existingTransactions = await _db.Transactions
|
var csvTransactions = new List<(TransactionCsvRow Row, Transaction Transaction, TransactionKey Key)>();
|
||||||
.Select(t => new TransactionKey(t.Date, t.Amount, t.Name, t.Memo, t.AccountId, t.CardId))
|
DateTime? minDate = null;
|
||||||
.ToHashSetAsync();
|
DateTime? maxDate = null;
|
||||||
|
|
||||||
using var reader = new StreamReader(csvStream);
|
using (var reader = new StreamReader(csvStream))
|
||||||
using var csv = new CsvReader(reader, new CsvConfiguration(CultureInfo.InvariantCulture)
|
using (var csv = new CsvReader(reader, new CsvConfiguration(CultureInfo.InvariantCulture)
|
||||||
{
|
{
|
||||||
HasHeaderRecord = true,
|
HasHeaderRecord = true,
|
||||||
HeaderValidated = null,
|
HeaderValidated = null,
|
||||||
MissingFieldFound = null
|
MissingFieldFound = null
|
||||||
});
|
}))
|
||||||
|
{
|
||||||
csv.Read();
|
csv.Read();
|
||||||
csv.ReadHeader();
|
csv.ReadHeader();
|
||||||
var hasCategory = csv.HeaderRecord?.Any(h => h.Equals("Category", StringComparison.OrdinalIgnoreCase)) ?? false;
|
var hasCategory = csv.HeaderRecord?.Any(h => h.Equals("Category", StringComparison.OrdinalIgnoreCase)) ?? false;
|
||||||
@@ -284,6 +284,35 @@ namespace MoneyMap.Pages
|
|||||||
var transaction = MapToTransaction(row, paymentResolution);
|
var transaction = MapToTransaction(row, paymentResolution);
|
||||||
var key = new TransactionKey(transaction);
|
var key = new TransactionKey(transaction);
|
||||||
|
|
||||||
|
csvTransactions.Add((row, transaction, key));
|
||||||
|
|
||||||
|
// Track date range
|
||||||
|
if (minDate == null || transaction.Date < minDate) minDate = transaction.Date;
|
||||||
|
if (maxDate == null || transaction.Date > maxDate) maxDate = transaction.Date;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load existing transactions within the date range for fast duplicate checking
|
||||||
|
HashSet<TransactionKey> existingTransactions;
|
||||||
|
if (minDate.HasValue && maxDate.HasValue)
|
||||||
|
{
|
||||||
|
// Add a buffer of 1 day on each side to catch any edge cases
|
||||||
|
var startDate = minDate.Value.AddDays(-1);
|
||||||
|
var endDate = maxDate.Value.AddDays(1);
|
||||||
|
|
||||||
|
existingTransactions = await _db.Transactions
|
||||||
|
.Where(t => t.Date >= startDate && t.Date <= endDate)
|
||||||
|
.Select(t => new TransactionKey(t.Date, t.Amount, t.Name, t.Memo, t.AccountId, t.CardId))
|
||||||
|
.ToHashSetAsync();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
existingTransactions = new HashSet<TransactionKey>();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second pass: check for duplicates and build preview
|
||||||
|
foreach (var (row, transaction, key) in csvTransactions)
|
||||||
|
{
|
||||||
// Fast in-memory duplicate checking
|
// Fast in-memory duplicate checking
|
||||||
bool isDuplicate = addedInThisBatch.Contains(key) || existingTransactions.Contains(key);
|
bool isDuplicate = addedInThisBatch.Contains(key) || existingTransactions.Contains(key);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user