Files
MoneyMap/MoneyMap.Core/Services/ReceiptManager.cs
T
2026-04-20 18:18:20 -04:00

411 lines
16 KiB
C#

using Microsoft.EntityFrameworkCore;
using MoneyMap.Data;
using MoneyMap.Models;
using System.Security.Cryptography;
using System.Text;
namespace MoneyMap.Services
{
public interface IReceiptManager
{
Task<ReceiptUploadResult> UploadReceiptAsync(long transactionId, IFormFile file);
Task<ReceiptUploadResult> UploadUnmappedReceiptAsync(IFormFile file);
Task<BulkUploadResult> UploadManyUnmappedReceiptsAsync(IReadOnlyList<IFormFile> files);
Task<bool> DeleteReceiptAsync(long receiptId);
Task<bool> MapReceiptToTransactionAsync(long receiptId, long transactionId);
Task<bool> UnmapReceiptAsync(long receiptId);
string GetReceiptPhysicalPath(Receipt receipt);
Task<Receipt?> GetReceiptAsync(long receiptId);
}
public class ReceiptManager : IReceiptManager
{
private readonly MoneyMapContext _db;
private readonly IWebHostEnvironment _environment;
private readonly IConfiguration _configuration;
private readonly IServiceProvider _serviceProvider;
private readonly IReceiptParseQueue _parseQueue;
private readonly ILogger<ReceiptManager> _logger;
private const long MaxFileSize = 10 * 1024 * 1024; // 10MB
private static readonly string[] AllowedExtensions = { ".jpg", ".jpeg", ".png", ".pdf", ".gif", ".heic" };
// Magic bytes for file type validation (prevents extension spoofing)
private static readonly Dictionary<string, byte[][]> FileSignatures = new()
{
{ ".jpg", new[] { new byte[] { 0xFF, 0xD8, 0xFF } } },
{ ".jpeg", new[] { new byte[] { 0xFF, 0xD8, 0xFF } } },
{ ".png", new[] { new byte[] { 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A } } },
{ ".gif", new[] { new byte[] { 0x47, 0x49, 0x46, 0x38 } } }, // GIF87a or GIF89a
{ ".pdf", new[] { new byte[] { 0x25, 0x50, 0x44, 0x46 } } }, // %PDF
{ ".heic", new[] {
new byte[] { 0x00, 0x00, 0x00, 0x18, 0x66, 0x74, 0x79, 0x70, 0x68, 0x65, 0x69, 0x63 }, // ftypheic
new byte[] { 0x00, 0x00, 0x00, 0x1C, 0x66, 0x74, 0x79, 0x70, 0x68, 0x65, 0x69, 0x63 }, // ftypheic (variant)
new byte[] { 0x00, 0x00, 0x00 } // Generic ftyp header (relaxed check)
}}
};
public ReceiptManager(
MoneyMapContext db,
IWebHostEnvironment environment,
IConfiguration configuration,
IServiceProvider serviceProvider,
IReceiptParseQueue parseQueue,
ILogger<ReceiptManager> logger)
{
_db = db;
_environment = environment;
_configuration = configuration;
_serviceProvider = serviceProvider;
_parseQueue = parseQueue;
_logger = logger;
}
private string GetReceiptsBasePath()
{
// Get from config, default to "receipts" in wwwroot
var relativePath = _configuration["Receipts:StoragePath"] ?? "receipts";
return Path.Combine(_environment.WebRootPath, relativePath);
}
public async Task<ReceiptUploadResult> UploadReceiptAsync(long transactionId, IFormFile file)
{
// Verify transaction exists
var transaction = await _db.Transactions.FindAsync(transactionId);
if (transaction == null)
return ReceiptUploadResult.Failure("Transaction not found.");
return await UploadReceiptInternalAsync(file, transactionId);
}
public async Task<ReceiptUploadResult> UploadUnmappedReceiptAsync(IFormFile file)
{
return await UploadReceiptInternalAsync(file, null);
}
private async Task<ReceiptUploadResult> UploadReceiptInternalAsync(IFormFile file, long? transactionId)
{
// Validate file
if (file == null || file.Length == 0)
return ReceiptUploadResult.Failure("No file selected.");
if (file.Length > MaxFileSize)
return ReceiptUploadResult.Failure($"File size exceeds {MaxFileSize / 1024 / 1024}MB limit.");
var extension = Path.GetExtension(file.FileName).ToLowerInvariant();
if (!AllowedExtensions.Contains(extension))
return ReceiptUploadResult.Failure($"File type {extension} not allowed. Use: {string.Join(", ", AllowedExtensions)}");
// Validate file content matches extension (magic bytes check)
if (!await ValidateFileSignatureAsync(file, extension))
return ReceiptUploadResult.Failure($"File content does not match {extension} format. The file may be corrupted or have an incorrect extension.");
// Create receipts directory if it doesn't exist
var receiptsBasePath = GetReceiptsBasePath();
if (!Directory.Exists(receiptsBasePath))
Directory.CreateDirectory(receiptsBasePath);
// Calculate SHA256 hash
string fileHash;
using (var sha256 = SHA256.Create())
{
using var stream = file.OpenReadStream();
var hashBytes = await sha256.ComputeHashAsync(stream);
fileHash = BitConverter.ToString(hashBytes).Replace("-", "").ToLowerInvariant();
}
// Check for exact duplicate (same transaction + same hash)
if (transactionId.HasValue)
{
var existingReceipt = await _db.Receipts
.FirstOrDefaultAsync(r => r.TransactionId == transactionId && r.FileHashSha256 == fileHash);
if (existingReceipt != null)
return ReceiptUploadResult.Failure("This receipt has already been uploaded for this transaction.");
}
// Check for potential duplicates (same hash, same name+size)
var duplicateWarnings = await CheckForDuplicatesAsync(fileHash, file.FileName, file.Length);
// Generate unique filename
var storedFileName = $"{transactionId?.ToString() ?? "unmapped"}_{Guid.NewGuid()}{extension}";
var filePath = Path.Combine(receiptsBasePath, storedFileName);
// Save file
using (var fileStream = new FileStream(filePath, FileMode.Create))
{
await file.CopyToAsync(fileStream);
}
// Store just the filename in database (base path comes from config)
var relativeStoragePath = storedFileName;
// Create receipt record
var receipt = new Receipt
{
TransactionId = transactionId,
FileName = SanitizeFileName(file.FileName),
StoragePath = relativeStoragePath,
FileSizeBytes = file.Length,
ContentType = file.ContentType,
FileHashSha256 = fileHash,
UploadedAtUtc = DateTime.UtcNow
};
receipt.ParseStatus = ReceiptParseStatus.Queued;
_db.Receipts.Add(receipt);
await _db.SaveChangesAsync();
await _parseQueue.EnqueueAsync(receipt.Id);
_logger.LogInformation("Receipt {ReceiptId} enqueued for parsing", receipt.Id);
return ReceiptUploadResult.Success(receipt, duplicateWarnings);
}
public async Task<BulkUploadResult> UploadManyUnmappedReceiptsAsync(IReadOnlyList<IFormFile> files)
{
var uploaded = new List<BulkUploadItem>();
var failed = new List<BulkUploadFailure>();
foreach (var file in files)
{
var result = await UploadReceiptInternalAsync(file, null);
if (result.IsSuccess)
{
uploaded.Add(new BulkUploadItem
{
ReceiptId = result.Receipt!.Id,
FileName = result.Receipt.FileName,
DuplicateWarnings = result.DuplicateWarnings
});
}
else
{
failed.Add(new BulkUploadFailure
{
FileName = file.FileName,
ErrorMessage = result.ErrorMessage ?? "Unknown error"
});
}
}
return new BulkUploadResult
{
Uploaded = uploaded,
Failed = failed
};
}
private async Task<List<DuplicateWarning>> CheckForDuplicatesAsync(string fileHash, string fileName, long fileSize)
{
var warnings = new List<DuplicateWarning>();
// Check for receipts with same hash
var hashMatches = await _db.Receipts
.Include(r => r.Transaction)
.Where(r => r.FileHashSha256 == fileHash)
.ToListAsync();
foreach (var match in hashMatches)
{
warnings.Add(new DuplicateWarning
{
ReceiptId = match.Id,
FileName = match.FileName,
UploadedAtUtc = match.UploadedAtUtc,
TransactionId = match.TransactionId,
TransactionName = match.Transaction?.Name,
Reason = "Identical file content (same hash)"
});
}
// Check for receipts with same name and size (but different hash - might be resaved/edited)
if (!warnings.Any())
{
var nameAndSizeMatches = await _db.Receipts
.Include(r => r.Transaction)
.Where(r => r.FileName == fileName && r.FileSizeBytes == fileSize)
.ToListAsync();
foreach (var match in nameAndSizeMatches)
{
warnings.Add(new DuplicateWarning
{
ReceiptId = match.Id,
FileName = match.FileName,
UploadedAtUtc = match.UploadedAtUtc,
TransactionId = match.TransactionId,
TransactionName = match.Transaction?.Name,
Reason = "Same file name and size"
});
}
}
return warnings;
}
public async Task<bool> MapReceiptToTransactionAsync(long receiptId, long transactionId)
{
var receipt = await _db.Receipts.FindAsync(receiptId);
if (receipt == null)
return false;
var transaction = await _db.Transactions.FindAsync(transactionId);
if (transaction == null)
return false;
// Allow remapping: simply update the TransactionId
if (receipt.TransactionId == transactionId)
return true;
receipt.TransactionId = transactionId;
await _db.SaveChangesAsync();
return true;
}
public async Task<bool> UnmapReceiptAsync(long receiptId)
{
var receipt = await _db.Receipts.FindAsync(receiptId);
if (receipt == null)
return false;
// Set TransactionId to null to unmap
receipt.TransactionId = null;
await _db.SaveChangesAsync();
return true;
}
private static async Task<bool> ValidateFileSignatureAsync(IFormFile file, string extension)
{
if (!FileSignatures.TryGetValue(extension, out var signatures))
return true; // No signature check for unknown extensions
var maxSignatureLength = signatures.Max(s => s.Length);
var headerBytes = new byte[Math.Min(maxSignatureLength, (int)file.Length)];
await using var stream = file.OpenReadStream();
_ = await stream.ReadAsync(headerBytes.AsMemory(0, headerBytes.Length));
// Check if file starts with any of the valid signatures for this extension
return signatures.Any(signature =>
headerBytes.Length >= signature.Length &&
headerBytes.Take(signature.Length).SequenceEqual(signature));
}
private static string SanitizeFileName(string fileName)
{
if (string.IsNullOrWhiteSpace(fileName))
return "receipt";
// Remove non-ASCII characters and replace them with safe equivalents
var sanitized = new StringBuilder();
foreach (var c in fileName)
{
if (c == '' || c == '' || c == '')
{
// Skip trademark/copyright symbols
continue;
}
else if (c >= 32 && c <= 126)
{
// Keep ASCII printable characters
sanitized.Append(c);
}
else
{
// Replace other non-ASCII with underscore
sanitized.Append('_');
}
}
var result = sanitized.ToString().Trim();
return string.IsNullOrWhiteSpace(result) ? "receipt" : result;
}
public async Task<bool> DeleteReceiptAsync(long receiptId)
{
var receipt = await _db.Receipts.FindAsync(receiptId);
if (receipt == null)
return false;
// Delete physical file
var filePath = GetReceiptPhysicalPath(receipt);
if (File.Exists(filePath))
{
try
{
File.Delete(filePath);
}
catch
{
// Continue even if file delete fails
}
}
// Delete database record (cascade will handle ParseLogs and LineItems)
_db.Receipts.Remove(receipt);
await _db.SaveChangesAsync();
return true;
}
public string GetReceiptPhysicalPath(Receipt receipt)
{
// StoragePath is just the filename, combine with configured base path
return Path.Combine(GetReceiptsBasePath(), receipt.StoragePath);
}
public async Task<Receipt?> GetReceiptAsync(long receiptId)
{
return await _db.Receipts
.Include(r => r.Transaction)
.FirstOrDefaultAsync(r => r.Id == receiptId);
}
}
public class ReceiptUploadResult
{
public bool IsSuccess { get; init; }
public Receipt? Receipt { get; init; }
public string? ErrorMessage { get; init; }
public List<DuplicateWarning> DuplicateWarnings { get; init; } = new();
public static ReceiptUploadResult Success(Receipt receipt, List<DuplicateWarning>? warnings = null) =>
new() { IsSuccess = true, Receipt = receipt, DuplicateWarnings = warnings ?? new() };
public static ReceiptUploadResult Failure(string error) =>
new() { IsSuccess = false, ErrorMessage = error };
}
public class DuplicateWarning
{
public long ReceiptId { get; set; }
public string FileName { get; set; } = "";
public DateTime UploadedAtUtc { get; set; }
public long? TransactionId { get; set; }
public string? TransactionName { get; set; }
public string Reason { get; set; } = "";
}
public class BulkUploadResult
{
public List<BulkUploadItem> Uploaded { get; init; } = new();
public List<BulkUploadFailure> Failed { get; init; } = new();
public int TotalCount => Uploaded.Count + Failed.Count;
}
public class BulkUploadItem
{
public long ReceiptId { get; set; }
public string FileName { get; set; } = "";
public List<DuplicateWarning> DuplicateWarnings { get; set; } = new();
}
public class BulkUploadFailure
{
public string FileName { get; set; } = "";
public string ErrorMessage { get; set; } = "";
}
}