Compare commits

...

6 Commits

Author SHA1 Message Date
00a0b3e14f Add --reimport and --reprocess-reactions CLI modes
Add flags to reimport all data from archived files or selectively
reprocess reactions. Also replace EnsureCreatedAsync with explicit
database/table existence checks for safer initialization.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-05 23:20:25 -05:00
a281f7f1e7 Use export date from filename for archive subdirectory
Extract the date from filenames like "2026-01-20.json" instead of
using the current date, so archives are organized by export date
rather than processing date.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-05 23:20:17 -05:00
6f63f36df0 Improve import resilience with per-message saves and duplicate handling
Save after each message to isolate failures, catch and skip duplicate
key violations (SQL error 2601), and clear change tracker on rollback
to prevent cascading failures.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-05 23:20:08 -05:00
fbe52f72d6 Refactor reactions to track per-user instead of aggregate count
Replace Reaction.Count with UserId foreign key to User entity,
add unique index on (MessageId, EmojiName, UserId) to prevent
duplicates, and configure the User-Reactions relationship.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-05 23:19:59 -05:00
7896f9ef9a Fix duplicate reaction insertion error
- Add Unicode normalization for emoji names to handle encoding differences
- Check EF's local change tracker for pending reactions before insert
- Prevents DbUpdateException on unique index IX_Reactions_MessageId_EmojiName_UserId

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 13:37:53 -05:00
2da546fbd5 Add Gitea registry support and fix cross-device archiving
- Update docker-compose.yml to reference Gitea registry image
- Add docker-publish.cmd for building and pushing to Gitea
- Fix ArchiveService to handle cross-device moves by falling back
  to copy+delete when input and archive are on different volumes

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-21 18:19:03 -05:00
8 changed files with 444 additions and 53 deletions

View File

@@ -1,5 +1,6 @@
services:
discord-archive:
image: git.thecozycat.net/aj/discordarchivemanager:latest
build: .
volumes:
- ./input:/app/input

2
docker-publish.cmd Normal file
View File

@@ -0,0 +1,2 @@
docker build -t git.thecozycat.net/aj/discordarchivemanager:latest .
docker push git.thecozycat.net/aj/discordarchivemanager:latest

View File

@@ -111,7 +111,13 @@ public class DiscordArchiveContext : DbContext
.WithMany(m => m.Reactions)
.HasForeignKey(e => e.MessageId)
.OnDelete(DeleteBehavior.Cascade);
entity.HasOne(e => e.User)
.WithMany(u => u.Reactions)
.HasForeignKey(e => e.UserId)
.OnDelete(DeleteBehavior.Restrict);
entity.HasIndex(e => e.MessageId);
entity.HasIndex(e => e.UserId);
entity.HasIndex(e => new { e.MessageId, e.EmojiName, e.UserId }).IsUnique();
});
// Mention configuration

View File

@@ -19,9 +19,14 @@ public class Reaction
[MaxLength(256)]
public string EmojiName { get; set; } = null!;
public int Count { get; set; }
[Required]
[MaxLength(32)]
public string UserId { get; set; } = null!;
// Navigation properties
[ForeignKey(nameof(MessageId))]
public Message Message { get; set; } = null!;
[ForeignKey(nameof(UserId))]
public User User { get; set; } = null!;
}

View File

@@ -22,4 +22,5 @@ public class User
public ICollection<UserSnapshot> Snapshots { get; set; } = new List<UserSnapshot>();
public ICollection<Message> Messages { get; set; } = new List<Message>();
public ICollection<Mention> Mentions { get; set; } = new List<Mention>();
public ICollection<Reaction> Reactions { get; set; } = new List<Reaction>();
}

View File

@@ -1,6 +1,8 @@
using DiscordArchiveManager.Data;
using DiscordArchiveManager.Services;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Storage;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
@@ -32,11 +34,23 @@ var inputDirectory = config["Paths:InputDirectory"] ?? "/app/input";
var archiveDirectory = config["Paths:ArchiveDirectory"] ?? "/app/archive";
var imageDirectory = config["Paths:ImageDirectory"] ?? "/app/images";
var reprocessReactions = args.Contains("--reprocess-reactions");
var reimport = args.Contains("--reimport");
logger.LogInformation("Discord Archive Manager starting...");
logger.LogInformation("Input directory: {Path}", inputDirectory);
logger.LogInformation("Archive directory: {Path}", archiveDirectory);
logger.LogInformation("Image directory: {Path}", imageDirectory);
if (reimport)
{
logger.LogInformation("Mode: Reimport all data from archive");
}
else if (reprocessReactions)
{
logger.LogInformation("Mode: Reprocess reactions from archive");
}
// Ensure directories exist
Directory.CreateDirectory(inputDirectory);
Directory.CreateDirectory(archiveDirectory);
@@ -46,54 +60,84 @@ Directory.CreateDirectory(imageDirectory);
using (var scope = host.Services.CreateScope())
{
var context = scope.ServiceProvider.GetRequiredService<DiscordArchiveContext>();
logger.LogInformation("Ensuring database exists and applying migrations...");
await context.Database.EnsureCreatedAsync();
logger.LogInformation("Ensuring database schema exists...");
// Get the database creator for more control
var creator = context.GetService<IRelationalDatabaseCreator>()!;
if (!await creator.ExistsAsync())
{
logger.LogError("Database does not exist. Please create it first.");
return;
}
if (!await creator.HasTablesAsync())
{
logger.LogInformation("Creating database tables...");
await creator.CreateTablesAsync();
}
logger.LogInformation("Database ready.");
}
// Process files
// Process files or reprocess reactions
using (var scope = host.Services.CreateScope())
{
var importService = scope.ServiceProvider.GetRequiredService<JsonImportService>();
var archiveService = scope.ServiceProvider.GetRequiredService<ArchiveService>();
var files = archiveService.GetExportFiles(inputDirectory).ToList();
if (files.Count == 0)
if (reimport)
{
logger.LogInformation("No JSON files found in input directory.");
logger.LogInformation("Reimporting all data from archived files...");
var (success, errors) = await importService.ReimportFromArchiveAsync(archiveDirectory, imageDirectory);
logger.LogInformation("Reimport complete. Imported: {Success}, Errors: {Errors}", success, errors);
}
else if (reprocessReactions)
{
logger.LogInformation("Reprocessing reactions from archived files...");
var added = await importService.ReprocessReactionsAsync(archiveDirectory);
logger.LogInformation("Reprocessing complete. Added {Count} reactions.", added);
}
else
{
logger.LogInformation("Found {Count} JSON files to process.", files.Count);
var files = archiveService.GetExportFiles(inputDirectory).ToList();
var successCount = 0;
var skipCount = 0;
var errorCount = 0;
foreach (var file in files)
if (files.Count == 0)
{
try
{
var processed = await importService.ProcessFileAsync(file, imageDirectory, archiveDirectory);
if (processed)
{
successCount++;
}
else
{
skipCount++;
}
}
catch (Exception ex)
{
logger.LogError(ex, "Failed to process file: {Path}", file);
errorCount++;
}
logger.LogInformation("No JSON files found in input directory.");
}
else
{
logger.LogInformation("Found {Count} JSON files to process.", files.Count);
logger.LogInformation("Processing complete. Processed: {Success}, Skipped: {Skip}, Errors: {Error}",
successCount, skipCount, errorCount);
var successCount = 0;
var skipCount = 0;
var errorCount = 0;
foreach (var file in files)
{
try
{
var processed = await importService.ProcessFileAsync(file, imageDirectory, archiveDirectory);
if (processed)
{
successCount++;
}
else
{
skipCount++;
}
}
catch (Exception ex)
{
logger.LogError(ex, "Failed to process file: {Path}", file);
errorCount++;
}
}
logger.LogInformation("Processing complete. Processed: {Success}, Skipped: {Skip}, Errors: {Error}",
successCount, skipCount, errorCount);
}
}
}

View File

@@ -28,15 +28,15 @@ public class ArchiveService
var jsonDirectory = Path.GetDirectoryName(jsonFilePath)!;
var filesDirectory = GetFilesDirectoryPath(jsonFilePath);
// Create archive subdirectory based on date
var archiveSubdir = DateTime.Now.ToString("yyyy-MM-dd");
// Create archive subdirectory based on export date from filename
var archiveSubdir = GetExportDateFromFilename(jsonFileName) ?? DateTime.Now.ToString("yyyy-MM-dd");
var archivePath = Path.Combine(archiveRoot, archiveSubdir);
Directory.CreateDirectory(archivePath);
// Archive the JSON file
var archivedJsonPath = Path.Combine(archivePath, jsonFileName);
var uniqueJsonPath = GetUniquePath(archivedJsonPath);
File.Move(jsonFilePath, uniqueJsonPath);
MoveFile(jsonFilePath, uniqueJsonPath);
_logger.LogInformation("Archived JSON file to {Path}", uniqueJsonPath);
// Archive the _Files directory if it exists
@@ -45,7 +45,7 @@ public class ArchiveService
var filesDirectoryName = Path.GetFileName(filesDirectory);
var archivedFilesPath = Path.Combine(archivePath, filesDirectoryName);
var uniqueFilesPath = GetUniquePath(archivedFilesPath);
Directory.Move(filesDirectory, uniqueFilesPath);
MoveDirectory(filesDirectory, uniqueFilesPath);
_logger.LogInformation("Archived files directory to {Path}", uniqueFilesPath);
}
@@ -152,6 +152,78 @@ public class ArchiveService
}
}
/// <summary>
/// Extracts the export date from a filename like "2026-01-20.json".
/// Returns null if the filename doesn't match the expected pattern.
/// </summary>
private static string? GetExportDateFromFilename(string filename)
{
var nameWithoutExtension = Path.GetFileNameWithoutExtension(filename);
if (DateTime.TryParseExact(nameWithoutExtension, "yyyy-MM-dd", null, System.Globalization.DateTimeStyles.None, out _))
{
return nameWithoutExtension;
}
// Handle filenames with suffix like "2026-01-20_1.json"
var parts = nameWithoutExtension.Split('_');
if (parts.Length > 0 && DateTime.TryParseExact(parts[0], "yyyy-MM-dd", null, System.Globalization.DateTimeStyles.None, out _))
{
return parts[0];
}
return null;
}
/// <summary>
/// Moves a file, falling back to copy+delete for cross-device moves.
/// </summary>
private static void MoveFile(string source, string destination)
{
try
{
File.Move(source, destination);
}
catch (IOException)
{
File.Copy(source, destination);
File.Delete(source);
}
}
/// <summary>
/// Moves a directory, falling back to copy+delete for cross-device moves.
/// </summary>
private static void MoveDirectory(string source, string destination)
{
try
{
Directory.Move(source, destination);
}
catch (IOException)
{
CopyDirectory(source, destination);
Directory.Delete(source, true);
}
}
/// <summary>
/// Recursively copies a directory.
/// </summary>
private static void CopyDirectory(string source, string destination)
{
Directory.CreateDirectory(destination);
foreach (var file in Directory.GetFiles(source))
{
var destFile = Path.Combine(destination, Path.GetFileName(file));
File.Copy(file, destFile);
}
foreach (var dir in Directory.GetDirectories(source))
{
var destDir = Path.Combine(destination, Path.GetFileName(dir));
CopyDirectory(dir, destDir);
}
}
/// <summary>
/// Gets a unique file/directory path by appending a number if the path already exists.
/// </summary>

View File

@@ -72,20 +72,32 @@ public class JsonImportService
// Upsert Channel
await UpsertChannelAsync(export.Channel, export.Guild.Id);
// Process messages
// Process messages - save after each to isolate any issues
var processedCount = 0;
foreach (var message in export.Messages)
{
if (await ProcessMessageAsync(message, export.Channel.Id, jsonFilePath, imageRoot))
try
{
processedCount++;
if (await ProcessMessageAsync(message, export.Channel.Id, jsonFilePath, imageRoot))
{
await _context.SaveChangesAsync();
processedCount++;
}
}
catch (DbUpdateException ex) when (ex.InnerException is Microsoft.Data.SqlClient.SqlException sqlEx && sqlEx.Number == 2601)
{
// Duplicate key - log and continue
_logger.LogWarning("Duplicate key error for message {MessageId}, skipping: {Error}",
message.Id, sqlEx.Message);
_context.ChangeTracker.Clear();
// Re-upsert guild and channel as they were cleared
await UpsertGuildAsync(export.Guild);
await UpsertChannelAsync(export.Channel, export.Guild.Id);
}
}
_logger.LogInformation("Processed {Count} new messages", processedCount);
await _context.SaveChangesAsync();
// Archive the file
var archivePath = _archiveService.ArchiveExport(jsonFilePath, archiveRoot);
@@ -106,6 +118,9 @@ public class JsonImportService
catch (Exception ex)
{
await transaction.RollbackAsync();
_context.ChangeTracker.Clear(); // Clear tracked entities to prevent cascading failures
_processedMessages.Clear(); // Clear the session tracking
_addedReactions.Clear();
_logger.LogError(ex, "Error processing file, rolled back transaction: {Path}", jsonFilePath);
throw;
}
@@ -171,12 +186,21 @@ public class JsonImportService
private async Task<bool> ProcessMessageAsync(MessageInfo messageInfo, string channelId, string jsonFilePath, string imageRoot)
{
// Skip if message already exists
if (await _context.Messages.AnyAsync(m => m.Id == messageInfo.Id))
// Skip if message already processed in this session
if (_processedMessages.Contains(messageInfo.Id))
{
return false;
}
// Skip if message already exists in database
if (await _context.Messages.AnyAsync(m => m.Id == messageInfo.Id))
{
_processedMessages.Add(messageInfo.Id);
return false;
}
_processedMessages.Add(messageInfo.Id);
// Upsert author
await UpsertUserAsync(messageInfo.Author);
@@ -210,7 +234,7 @@ public class JsonImportService
// Process reactions
foreach (var reaction in messageInfo.Reactions)
{
ProcessReaction(reaction, messageInfo.Id);
await ProcessReactionAsync(reaction, messageInfo.Id);
}
// Process mentions
@@ -348,16 +372,252 @@ public class JsonImportService
_context.Embeds.Add(embed);
}
private void ProcessReaction(ReactionInfo reactionInfo, string messageId)
// Track added items in current session to avoid duplicates
private readonly HashSet<string> _processedMessages = new();
private readonly HashSet<(string MessageId, string EmojiName, string UserId)> _addedReactions = new();
private async Task ProcessReactionAsync(ReactionInfo reactionInfo, string messageId)
{
var reaction = new Reaction
// Create one Reaction record per user who reacted
if (reactionInfo.Users != null && reactionInfo.Users.Count > 0)
{
MessageId = messageId,
EmojiCode = reactionInfo.Emoji.Code,
EmojiName = reactionInfo.Emoji.Name,
Count = reactionInfo.Count
};
_context.Reactions.Add(reaction);
foreach (var userInfo in reactionInfo.Users)
{
// Normalize emoji name to handle potential Unicode differences
var normalizedEmojiName = reactionInfo.Emoji.Name.Normalize();
var key = (messageId, normalizedEmojiName, userInfo.Id);
// Check if already added in this session
if (_addedReactions.Contains(key)) continue;
// Check if exists in database
var existsInDb = await _context.Reactions
.AnyAsync(r => r.MessageId == messageId && r.EmojiName == normalizedEmojiName && r.UserId == userInfo.Id);
if (existsInDb)
{
_addedReactions.Add(key);
continue;
}
// Also check the local change tracker for entities not yet saved
var existsLocal = _context.ChangeTracker.Entries<Reaction>()
.Any(e => e.Entity.MessageId == messageId &&
e.Entity.EmojiName == normalizedEmojiName &&
e.Entity.UserId == userInfo.Id);
if (existsLocal)
{
_addedReactions.Add(key);
continue;
}
// Ensure the user exists
var user = await _context.Users.FindAsync(userInfo.Id);
if (user == null)
{
user = new User
{
Id = userInfo.Id,
Name = userInfo.Name,
Discriminator = userInfo.Discriminator,
IsBot = userInfo.IsBot
};
_context.Users.Add(user);
}
var reaction = new Reaction
{
MessageId = messageId,
EmojiCode = reactionInfo.Emoji.Code,
EmojiName = normalizedEmojiName,
UserId = userInfo.Id
};
_context.Reactions.Add(reaction);
_addedReactions.Add(key);
}
}
}
/// <summary>
/// Reimports all data from archived JSON files (for rebuilding database).
/// </summary>
public async Task<(int Success, int Errors)> ReimportFromArchiveAsync(string archiveDirectory, string imageDirectory)
{
var successCount = 0;
var errorCount = 0;
var jsonFiles = Directory.EnumerateFiles(archiveDirectory, "*.json", SearchOption.AllDirectories)
.OrderBy(f => f) // Process in order
.ToList();
_logger.LogInformation("Found {Count} archived JSON files to reimport", jsonFiles.Count);
// Preload existing data to avoid checking database repeatedly
_processedMessages.Clear();
foreach (var id in await _context.Messages.Select(m => m.Id).ToListAsync())
{
_processedMessages.Add(id);
}
_logger.LogInformation("Found {Count} existing messages in database", _processedMessages.Count);
_addedReactions.Clear();
foreach (var r in await _context.Reactions.Select(r => new { r.MessageId, r.EmojiName, r.UserId }).ToListAsync())
{
_addedReactions.Add((r.MessageId, r.EmojiName, r.UserId));
}
_logger.LogInformation("Found {Count} existing reactions in database", _addedReactions.Count);
foreach (var jsonFilePath in jsonFiles)
{
try
{
var json = await File.ReadAllTextAsync(jsonFilePath);
var export = JsonSerializer.Deserialize<DiscordExport>(json, JsonOptions);
if (export == null)
{
_logger.LogWarning("Failed to deserialize: {Path}", jsonFilePath);
errorCount++;
continue;
}
// Process in a transaction
await using var transaction = await _context.Database.BeginTransactionAsync();
try
{
// Upsert Guild
await UpsertGuildAsync(export.Guild);
// Upsert Channel
await UpsertChannelAsync(export.Channel, export.Guild.Id);
// Process messages
var processedCount = 0;
foreach (var message in export.Messages)
{
if (await ProcessMessageAsync(message, export.Channel.Id, jsonFilePath, imageDirectory))
{
processedCount++;
}
}
await _context.SaveChangesAsync();
await transaction.CommitAsync();
if (processedCount > 0)
{
_logger.LogDebug("Imported {Count} messages from {Path}", processedCount, jsonFilePath);
}
successCount++;
}
catch (Exception ex)
{
await transaction.RollbackAsync();
_logger.LogWarning(ex, "Failed to reimport: {Path}", jsonFilePath);
errorCount++;
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to read file: {Path}", jsonFilePath);
errorCount++;
}
}
return (successCount, errorCount);
}
/// <summary>
/// Reprocesses archived JSON files to add missing reactions.
/// </summary>
public async Task<int> ReprocessReactionsAsync(string archiveDirectory)
{
var totalAdded = 0;
var jsonFiles = Directory.EnumerateFiles(archiveDirectory, "*.json", SearchOption.AllDirectories).ToList();
_logger.LogInformation("Found {Count} archived JSON files to reprocess for reactions", jsonFiles.Count);
// Track existing reactions to avoid duplicates (MessageId, EmojiName, UserId)
var existingReactions = new HashSet<(string MessageId, string EmojiName, string UserId)>();
var existing = await _context.Reactions
.Select(r => new { r.MessageId, r.EmojiName, r.UserId })
.ToListAsync();
foreach (var r in existing)
{
existingReactions.Add((r.MessageId, r.EmojiName, r.UserId));
}
_logger.LogInformation("Found {Count} existing reactions in database", existing.Count);
foreach (var jsonFilePath in jsonFiles)
{
try
{
var json = await File.ReadAllTextAsync(jsonFilePath);
var export = JsonSerializer.Deserialize<DiscordExport>(json, JsonOptions);
if (export == null) continue;
var fileAdded = 0;
foreach (var message in export.Messages)
{
foreach (var reactionInfo in message.Reactions)
{
if (reactionInfo.Users == null || reactionInfo.Users.Count == 0) continue;
foreach (var userInfo in reactionInfo.Users)
{
// Normalize emoji name to handle potential Unicode differences
var normalizedEmojiName = reactionInfo.Emoji.Name.Normalize();
var key = (message.Id, normalizedEmojiName, userInfo.Id);
if (existingReactions.Contains(key)) continue;
// Ensure user exists
var user = await _context.Users.FindAsync(userInfo.Id);
if (user == null)
{
user = new User
{
Id = userInfo.Id,
Name = userInfo.Name,
Discriminator = userInfo.Discriminator,
IsBot = userInfo.IsBot
};
_context.Users.Add(user);
}
var reaction = new Reaction
{
MessageId = message.Id,
EmojiCode = reactionInfo.Emoji.Code,
EmojiName = normalizedEmojiName,
UserId = userInfo.Id
};
_context.Reactions.Add(reaction);
existingReactions.Add(key);
fileAdded++;
totalAdded++;
}
}
}
if (fileAdded > 0)
{
await _context.SaveChangesAsync();
_logger.LogDebug("Added {Count} reactions from {Path}", fileAdded, jsonFilePath);
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to reprocess file: {Path}", jsonFilePath);
_context.ChangeTracker.Clear();
}
}
return totalAdded;
}
private async Task ProcessMentionAsync(MentionInfo mentionInfo, string messageId)