commit 2633bbf37a15c9d2692d6e1d07cca218c611b9df Author: AJ Isaacs Date: Tue Jan 20 12:26:38 2026 -0500 Initial commit Add Discord Archive Manager project with: - Entity Framework Core data models for Discord exports - JSON import service for processing Discord chat exports - Archive service for managing imported data - Docker configuration for containerized deployment Co-Authored-By: Claude Opus 4.5 diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..255b314 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,7 @@ +{ + "permissions": { + "allow": [ + "Bash(git reset:*)" + ] + } +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..69a1f0d --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# Environment files +.env +.env.* +*.local + +# Build results +bin/ +obj/ +[Dd]ebug/ +[Rr]elease/ +x64/ +x86/ + +# Visual Studio +.vs/ +*.suo +*.user +*.userosscache +*.sln.docstates + +# NuGet +*.nupkg +**/packages/* +!**/packages/build/ + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b5424f4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build +WORKDIR /src + +# Copy csproj and restore +COPY src/DiscordArchiveManager/DiscordArchiveManager.csproj src/DiscordArchiveManager/ +RUN dotnet restore src/DiscordArchiveManager/DiscordArchiveManager.csproj + +# Copy everything else and build +COPY . . +WORKDIR /src/src/DiscordArchiveManager +RUN dotnet publish -c Release -o /app/publish + +# Runtime image +FROM mcr.microsoft.com/dotnet/runtime:8.0 +WORKDIR /app + +# Create directories for volumes +RUN mkdir -p /app/input /app/archive /app/images + +COPY --from=build /app/publish . + +ENTRYPOINT ["dotnet", "DiscordArchiveManager.dll"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..3ca2728 --- /dev/null +++ b/README.md @@ -0,0 +1,159 @@ +# Discord Archive Manager + +A .NET 8 console application that parses DiscordChatExporter JSON exports and stores them in MSSQL with content-hashed image storage. + +## Features + +- Parses DiscordChatExporter JSON exports +- Stores messages, users, channels, attachments, embeds, and reactions in MSSQL +- Content-addressed image storage using SHA256 hashing (deduplicates identical files) +- Tracks user profile changes over time via snapshots +- Archives processed JSON files +- Idempotent processing (skips already-processed files) + +## Project Structure + +``` +DiscordArchiveManager/ +├── src/DiscordArchiveManager/ +│ ├── Program.cs # Entry point +│ ├── appsettings.json # Configuration +│ ├── Models/ +│ │ ├── DiscordExport.cs # JSON deserialization models +│ │ └── Entities/ # EF Core entities +│ ├── Data/ +│ │ └── DiscordArchiveContext.cs +│ └── Services/ +│ ├── JsonImportService.cs +│ ├── ImageHashService.cs +│ └── ArchiveService.cs +├── Dockerfile +├── docker-compose.yml +└── README.md +``` + +## Database Schema + +- **Guilds**: Discord servers +- **Channels**: Text channels within guilds +- **Users**: Discord users (basic info) +- **UserSnapshots**: Historical user profile data (nickname, color, avatar) +- **Messages**: Chat messages +- **Attachments**: Files attached to messages (stored with content hash) +- **Embeds**: Rich embeds in messages +- **Reactions**: Emoji reactions on messages +- **Mentions**: User mentions in messages +- **ProcessedFiles**: Tracking for imported files + +## Image Storage + +Images are stored using a content-addressed system: + +1. Calculate SHA256 hash of the file +2. Store at `/images/{hash[0:2]}/{hash[2:4]}/{hash}.{ext}` + +Example: A file with hash `a1b2c3d4e5f6...` and extension `.png` is stored at: +``` +/images/a1/b2/a1b2c3d4e5f6....png +``` + +Benefits: +- Automatic deduplication (identical files share storage) +- Even distribution across directories +- Fast lookup by hash + +## Configuration + +### appsettings.json + +```json +{ + "ConnectionStrings": { + "Discord": "Server=192.168.10.99;Database=DiscordArchive;User Id=sa;Password=YourPassword;TrustServerCertificate=true" + }, + "Paths": { + "InputDirectory": "/app/input", + "ArchiveDirectory": "/app/archive", + "ImageDirectory": "/app/images" + } +} +``` + +### Environment Variables + +Configuration can also be set via environment variables: +- `ConnectionStrings__Discord`: Database connection string +- `Paths__InputDirectory`: Directory to scan for JSON files +- `Paths__ArchiveDirectory`: Directory to move processed files +- `Paths__ImageDirectory`: Directory for content-hashed images + +## Usage + +### With Docker Compose + +1. Create input/archive/images directories: + ```bash + mkdir -p input archive images + ``` + +2. Place DiscordChatExporter JSON exports in the `input` directory + +3. Update the connection string in `docker-compose.yml` + +4. Build and run: + ```bash + docker compose build + docker compose up + ``` + +### Without Docker + +1. Ensure .NET 8 SDK is installed + +2. Update `appsettings.json` with your configuration + +3. Build and run: + ```bash + cd src/DiscordArchiveManager + dotnet run + ``` + +## DiscordChatExporter Export Format + +This tool expects JSON exports from [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter). + +When exporting, ensure: +- Format: JSON +- "Download assets" is enabled (for local attachment storage) + +The tool expects the `_Files` directory to be alongside the JSON file: +``` +exports/ +├── general-2024-01-15.json +└── general-2024-01-15.json_Files/ + ├── attachment1.png + └── avatar123.webp +``` + +## Processing Flow + +1. Scan input directory for `*.json` files +2. For each unprocessed file: + - Parse JSON into model objects + - Upsert Guild and Channel (idempotent) + - Upsert Users and create snapshots for profile changes + - Insert Messages (skip if ID exists) + - Process attachments: + - Calculate SHA256 hash + - Copy to content-hashed location if new + - Reference existing path if duplicate + - Process embeds, reactions, and mentions +3. Archive JSON file and `_Files` folder +4. Record in ProcessedFiles table + +## Re-running + +The tool is safe to run multiple times: +- Already-processed files are skipped (tracked in ProcessedFiles table) +- Existing messages are not duplicated (checked by Discord message ID) +- Duplicate images are not re-copied (checked by content hash) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c88bcfd --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,11 @@ +services: + discord-archive: + build: . + volumes: + - ./input:/app/input + - ./archive:/app/archive + - ./images:/app/images + env_file: + - .env + extra_hosts: + - "host.docker.internal:host-gateway" diff --git a/src/DiscordArchiveManager.sln b/src/DiscordArchiveManager.sln new file mode 100644 index 0000000..19227fe --- /dev/null +++ b/src/DiscordArchiveManager.sln @@ -0,0 +1,22 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.12.35514.174 d17.12 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DiscordArchiveManager", "DiscordArchiveManager\DiscordArchiveManager.csproj", "{7EFE3664-7102-411F-A8FE-91072682F192}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {7EFE3664-7102-411F-A8FE-91072682F192}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7EFE3664-7102-411F-A8FE-91072682F192}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7EFE3664-7102-411F-A8FE-91072682F192}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7EFE3664-7102-411F-A8FE-91072682F192}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/src/DiscordArchiveManager/Data/DiscordArchiveContext.cs b/src/DiscordArchiveManager/Data/DiscordArchiveContext.cs new file mode 100644 index 0000000..aec49aa --- /dev/null +++ b/src/DiscordArchiveManager/Data/DiscordArchiveContext.cs @@ -0,0 +1,140 @@ +using DiscordArchiveManager.Models.Entities; +using Microsoft.EntityFrameworkCore; + +namespace DiscordArchiveManager.Data; + +public class DiscordArchiveContext : DbContext +{ + public DiscordArchiveContext(DbContextOptions options) : base(options) + { + } + + public DbSet Guilds => Set(); + public DbSet Channels => Set(); + public DbSet Users => Set(); + public DbSet UserSnapshots => Set(); + public DbSet Messages => Set(); + public DbSet Attachments => Set(); + public DbSet Embeds => Set(); + public DbSet Reactions => Set(); + public DbSet Mentions => Set(); + public DbSet ProcessedFiles => Set(); + + protected override void OnModelCreating(ModelBuilder modelBuilder) + { + base.OnModelCreating(modelBuilder); + + // Guild configuration + modelBuilder.Entity(entity => + { + entity.HasKey(e => e.Id); + entity.Property(e => e.Id).ValueGeneratedNever(); + }); + + // Channel configuration + modelBuilder.Entity(entity => + { + entity.HasKey(e => e.Id); + entity.Property(e => e.Id).ValueGeneratedNever(); + entity.HasOne(e => e.Guild) + .WithMany(g => g.Channels) + .HasForeignKey(e => e.GuildId) + .OnDelete(DeleteBehavior.Cascade); + entity.HasIndex(e => e.GuildId); + }); + + // User configuration + modelBuilder.Entity(entity => + { + entity.HasKey(e => e.Id); + entity.Property(e => e.Id).ValueGeneratedNever(); + }); + + // UserSnapshot configuration + modelBuilder.Entity(entity => + { + entity.HasKey(e => e.Id); + entity.HasOne(e => e.User) + .WithMany(u => u.Snapshots) + .HasForeignKey(e => e.UserId) + .OnDelete(DeleteBehavior.Cascade); + entity.HasIndex(e => e.UserId); + entity.HasIndex(e => new { e.UserId, e.AvatarHash, e.Nickname, e.Color }); + }); + + // Message configuration + modelBuilder.Entity(entity => + { + entity.HasKey(e => e.Id); + entity.Property(e => e.Id).ValueGeneratedNever(); + entity.HasOne(e => e.Channel) + .WithMany(c => c.Messages) + .HasForeignKey(e => e.ChannelId) + .OnDelete(DeleteBehavior.Cascade); + entity.HasOne(e => e.Author) + .WithMany(u => u.Messages) + .HasForeignKey(e => e.AuthorId) + .OnDelete(DeleteBehavior.Restrict); + entity.HasIndex(e => e.ChannelId); + entity.HasIndex(e => e.AuthorId); + entity.HasIndex(e => e.Timestamp); + }); + + // Attachment configuration + modelBuilder.Entity(entity => + { + entity.HasKey(e => e.Id); + entity.HasOne(e => e.Message) + .WithMany(m => m.Attachments) + .HasForeignKey(e => e.MessageId) + .OnDelete(DeleteBehavior.Cascade); + entity.HasIndex(e => e.MessageId); + entity.HasIndex(e => e.ContentHash); + }); + + // Embed configuration + modelBuilder.Entity(entity => + { + entity.HasKey(e => e.Id); + entity.HasOne(e => e.Message) + .WithMany(m => m.Embeds) + .HasForeignKey(e => e.MessageId) + .OnDelete(DeleteBehavior.Cascade); + entity.HasIndex(e => e.MessageId); + }); + + // Reaction configuration + modelBuilder.Entity(entity => + { + entity.HasKey(e => e.Id); + entity.HasOne(e => e.Message) + .WithMany(m => m.Reactions) + .HasForeignKey(e => e.MessageId) + .OnDelete(DeleteBehavior.Cascade); + entity.HasIndex(e => e.MessageId); + }); + + // Mention configuration + modelBuilder.Entity(entity => + { + entity.HasKey(e => e.Id); + entity.HasOne(e => e.Message) + .WithMany(m => m.Mentions) + .HasForeignKey(e => e.MessageId) + .OnDelete(DeleteBehavior.Cascade); + entity.HasOne(e => e.User) + .WithMany(u => u.Mentions) + .HasForeignKey(e => e.UserId) + .OnDelete(DeleteBehavior.Restrict); + entity.HasIndex(e => e.MessageId); + entity.HasIndex(e => e.UserId); + }); + + // ProcessedFile configuration + modelBuilder.Entity(entity => + { + entity.HasKey(e => e.Id); + entity.HasIndex(e => e.FilePath).IsUnique(); + }); + } +} diff --git a/src/DiscordArchiveManager/DiscordArchiveManager.csproj b/src/DiscordArchiveManager/DiscordArchiveManager.csproj new file mode 100644 index 0000000..7a5945a --- /dev/null +++ b/src/DiscordArchiveManager/DiscordArchiveManager.csproj @@ -0,0 +1,25 @@ + + + + Exe + net8.0 + enable + enable + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + PreserveNewest + + + + diff --git a/src/DiscordArchiveManager/Models/DiscordExport.cs b/src/DiscordArchiveManager/Models/DiscordExport.cs new file mode 100644 index 0000000..609613c --- /dev/null +++ b/src/DiscordArchiveManager/Models/DiscordExport.cs @@ -0,0 +1,150 @@ +using System.Text.Json.Serialization; + +namespace DiscordArchiveManager.Models; + +/// +/// Root object for DiscordChatExporter JSON exports +/// +public record DiscordExport( + [property: JsonPropertyName("guild")] GuildInfo Guild, + [property: JsonPropertyName("channel")] ChannelInfo Channel, + [property: JsonPropertyName("dateRange")] DateRangeInfo? DateRange, + [property: JsonPropertyName("exportedAt")] DateTime ExportedAt, + [property: JsonPropertyName("messages")] List Messages, + [property: JsonPropertyName("messageCount")] int MessageCount +); + +public record GuildInfo( + [property: JsonPropertyName("id")] string Id, + [property: JsonPropertyName("name")] string Name, + [property: JsonPropertyName("iconUrl")] string? IconUrl +); + +public record ChannelInfo( + [property: JsonPropertyName("id")] string Id, + [property: JsonPropertyName("type")] string Type, + [property: JsonPropertyName("categoryId")] string? CategoryId, + [property: JsonPropertyName("category")] string? Category, + [property: JsonPropertyName("name")] string Name, + [property: JsonPropertyName("topic")] string? Topic +); + +public record DateRangeInfo( + [property: JsonPropertyName("after")] DateTime? After, + [property: JsonPropertyName("before")] DateTime? Before +); + +public record MessageInfo( + [property: JsonPropertyName("id")] string Id, + [property: JsonPropertyName("type")] string Type, + [property: JsonPropertyName("timestamp")] DateTime Timestamp, + [property: JsonPropertyName("timestampEdited")] DateTime? TimestampEdited, + [property: JsonPropertyName("callEndedTimestamp")] DateTime? CallEndedTimestamp, + [property: JsonPropertyName("isPinned")] bool IsPinned, + [property: JsonPropertyName("content")] string Content, + [property: JsonPropertyName("author")] AuthorInfo Author, + [property: JsonPropertyName("attachments")] List Attachments, + [property: JsonPropertyName("embeds")] List Embeds, + [property: JsonPropertyName("stickers")] List Stickers, + [property: JsonPropertyName("reactions")] List Reactions, + [property: JsonPropertyName("mentions")] List Mentions, + [property: JsonPropertyName("reference")] ReferenceInfo? Reference +); + +public record AuthorInfo( + [property: JsonPropertyName("id")] string Id, + [property: JsonPropertyName("name")] string Name, + [property: JsonPropertyName("discriminator")] string Discriminator, + [property: JsonPropertyName("nickname")] string? Nickname, + [property: JsonPropertyName("color")] string? Color, + [property: JsonPropertyName("isBot")] bool IsBot, + [property: JsonPropertyName("roles")] List Roles, + [property: JsonPropertyName("avatarUrl")] string? AvatarUrl +); + +public record RoleInfo( + [property: JsonPropertyName("id")] string Id, + [property: JsonPropertyName("name")] string Name, + [property: JsonPropertyName("color")] string? Color, + [property: JsonPropertyName("position")] int Position +); + +public record AttachmentInfo( + [property: JsonPropertyName("id")] string Id, + [property: JsonPropertyName("url")] string Url, + [property: JsonPropertyName("fileName")] string FileName, + [property: JsonPropertyName("fileSizeBytes")] long FileSizeBytes, + [property: JsonPropertyName("width")] int? Width, + [property: JsonPropertyName("height")] int? Height +); + +public record EmbedInfo( + [property: JsonPropertyName("title")] string? Title, + [property: JsonPropertyName("url")] string? Url, + [property: JsonPropertyName("timestamp")] DateTime? Timestamp, + [property: JsonPropertyName("description")] string? Description, + [property: JsonPropertyName("color")] string? Color, + [property: JsonPropertyName("author")] EmbedAuthorInfo? Author, + [property: JsonPropertyName("thumbnail")] EmbedImageInfo? Thumbnail, + [property: JsonPropertyName("image")] EmbedImageInfo? Image, + [property: JsonPropertyName("footer")] EmbedFooterInfo? Footer, + [property: JsonPropertyName("fields")] List? Fields +); + +public record EmbedAuthorInfo( + [property: JsonPropertyName("name")] string? Name, + [property: JsonPropertyName("url")] string? Url, + [property: JsonPropertyName("iconUrl")] string? IconUrl +); + +public record EmbedImageInfo( + [property: JsonPropertyName("url")] string? Url, + [property: JsonPropertyName("width")] int? Width, + [property: JsonPropertyName("height")] int? Height +); + +public record EmbedFooterInfo( + [property: JsonPropertyName("text")] string? Text, + [property: JsonPropertyName("iconUrl")] string? IconUrl +); + +public record EmbedFieldInfo( + [property: JsonPropertyName("name")] string Name, + [property: JsonPropertyName("value")] string Value, + [property: JsonPropertyName("isInline")] bool IsInline +); + +public record StickerInfo( + [property: JsonPropertyName("id")] string Id, + [property: JsonPropertyName("name")] string Name, + [property: JsonPropertyName("format")] string Format, + [property: JsonPropertyName("sourceUrl")] string? SourceUrl +); + +public record ReactionInfo( + [property: JsonPropertyName("emoji")] EmojiInfo Emoji, + [property: JsonPropertyName("count")] int Count, + [property: JsonPropertyName("users")] List? Users +); + +public record EmojiInfo( + [property: JsonPropertyName("id")] string? Id, + [property: JsonPropertyName("name")] string Name, + [property: JsonPropertyName("code")] string? Code, + [property: JsonPropertyName("isAnimated")] bool IsAnimated, + [property: JsonPropertyName("imageUrl")] string? ImageUrl +); + +public record MentionInfo( + [property: JsonPropertyName("id")] string Id, + [property: JsonPropertyName("name")] string Name, + [property: JsonPropertyName("discriminator")] string Discriminator, + [property: JsonPropertyName("nickname")] string? Nickname, + [property: JsonPropertyName("isBot")] bool IsBot +); + +public record ReferenceInfo( + [property: JsonPropertyName("messageId")] string? MessageId, + [property: JsonPropertyName("channelId")] string? ChannelId, + [property: JsonPropertyName("guildId")] string? GuildId +); diff --git a/src/DiscordArchiveManager/Models/Entities/Attachment.cs b/src/DiscordArchiveManager/Models/Entities/Attachment.cs new file mode 100644 index 0000000..c294b34 --- /dev/null +++ b/src/DiscordArchiveManager/Models/Entities/Attachment.cs @@ -0,0 +1,36 @@ +using System.ComponentModel.DataAnnotations; +using System.ComponentModel.DataAnnotations.Schema; + +namespace DiscordArchiveManager.Models.Entities; + +public class Attachment +{ + [Key] + public int Id { get; set; } + + [Required] + [MaxLength(32)] + public string MessageId { get; set; } = null!; + + [Required] + [MaxLength(512)] + public string OriginalFilename { get; set; } = null!; + + [Required] + [MaxLength(128)] + public string ContentHash { get; set; } = null!; + + [Required] + [MaxLength(512)] + public string StoragePath { get; set; } = null!; + + public long FileSize { get; set; } + + public int? Width { get; set; } + + public int? Height { get; set; } + + // Navigation properties + [ForeignKey(nameof(MessageId))] + public Message Message { get; set; } = null!; +} diff --git a/src/DiscordArchiveManager/Models/Entities/Channel.cs b/src/DiscordArchiveManager/Models/Entities/Channel.cs new file mode 100644 index 0000000..2f27093 --- /dev/null +++ b/src/DiscordArchiveManager/Models/Entities/Channel.cs @@ -0,0 +1,35 @@ +using System.ComponentModel.DataAnnotations; +using System.ComponentModel.DataAnnotations.Schema; + +namespace DiscordArchiveManager.Models.Entities; + +public class Channel +{ + [Key] + [MaxLength(32)] + public string Id { get; set; } = null!; + + [Required] + [MaxLength(32)] + public string GuildId { get; set; } = null!; + + [Required] + [MaxLength(64)] + public string Type { get; set; } = null!; + + [MaxLength(256)] + public string? Category { get; set; } + + [Required] + [MaxLength(256)] + public string Name { get; set; } = null!; + + [MaxLength(1024)] + public string? Topic { get; set; } + + // Navigation properties + [ForeignKey(nameof(GuildId))] + public Guild Guild { get; set; } = null!; + + public ICollection Messages { get; set; } = new List(); +} diff --git a/src/DiscordArchiveManager/Models/Entities/Embed.cs b/src/DiscordArchiveManager/Models/Entities/Embed.cs new file mode 100644 index 0000000..fe5118b --- /dev/null +++ b/src/DiscordArchiveManager/Models/Entities/Embed.cs @@ -0,0 +1,32 @@ +using System.ComponentModel.DataAnnotations; +using System.ComponentModel.DataAnnotations.Schema; + +namespace DiscordArchiveManager.Models.Entities; + +public class Embed +{ + [Key] + public int Id { get; set; } + + [Required] + [MaxLength(32)] + public string MessageId { get; set; } = null!; + + [MaxLength(512)] + public string? Title { get; set; } + + [MaxLength(2048)] + public string? Url { get; set; } + + public string? Description { get; set; } + + [MaxLength(16)] + public string? Color { get; set; } + + [MaxLength(512)] + public string? ThumbnailPath { get; set; } + + // Navigation properties + [ForeignKey(nameof(MessageId))] + public Message Message { get; set; } = null!; +} diff --git a/src/DiscordArchiveManager/Models/Entities/Guild.cs b/src/DiscordArchiveManager/Models/Entities/Guild.cs new file mode 100644 index 0000000..bfad466 --- /dev/null +++ b/src/DiscordArchiveManager/Models/Entities/Guild.cs @@ -0,0 +1,20 @@ +using System.ComponentModel.DataAnnotations; + +namespace DiscordArchiveManager.Models.Entities; + +public class Guild +{ + [Key] + [MaxLength(32)] + public string Id { get; set; } = null!; + + [Required] + [MaxLength(256)] + public string Name { get; set; } = null!; + + [MaxLength(512)] + public string? IconPath { get; set; } + + // Navigation properties + public ICollection Channels { get; set; } = new List(); +} diff --git a/src/DiscordArchiveManager/Models/Entities/Mention.cs b/src/DiscordArchiveManager/Models/Entities/Mention.cs new file mode 100644 index 0000000..c594de0 --- /dev/null +++ b/src/DiscordArchiveManager/Models/Entities/Mention.cs @@ -0,0 +1,25 @@ +using System.ComponentModel.DataAnnotations; +using System.ComponentModel.DataAnnotations.Schema; + +namespace DiscordArchiveManager.Models.Entities; + +public class Mention +{ + [Key] + public int Id { get; set; } + + [Required] + [MaxLength(32)] + public string MessageId { get; set; } = null!; + + [Required] + [MaxLength(32)] + public string UserId { get; set; } = null!; + + // Navigation properties + [ForeignKey(nameof(MessageId))] + public Message Message { get; set; } = null!; + + [ForeignKey(nameof(UserId))] + public User User { get; set; } = null!; +} diff --git a/src/DiscordArchiveManager/Models/Entities/Message.cs b/src/DiscordArchiveManager/Models/Entities/Message.cs new file mode 100644 index 0000000..23518ef --- /dev/null +++ b/src/DiscordArchiveManager/Models/Entities/Message.cs @@ -0,0 +1,46 @@ +using System.ComponentModel.DataAnnotations; +using System.ComponentModel.DataAnnotations.Schema; + +namespace DiscordArchiveManager.Models.Entities; + +public class Message +{ + [Key] + [MaxLength(32)] + public string Id { get; set; } = null!; + + [Required] + [MaxLength(32)] + public string ChannelId { get; set; } = null!; + + [Required] + [MaxLength(32)] + public string AuthorId { get; set; } = null!; + + [Required] + [MaxLength(64)] + public string Type { get; set; } = null!; + + public DateTime Timestamp { get; set; } + + public DateTime? TimestampEdited { get; set; } + + public bool IsPinned { get; set; } + + public string Content { get; set; } = string.Empty; + + [MaxLength(32)] + public string? ReferenceMessageId { get; set; } + + // Navigation properties + [ForeignKey(nameof(ChannelId))] + public Channel Channel { get; set; } = null!; + + [ForeignKey(nameof(AuthorId))] + public User Author { get; set; } = null!; + + public ICollection Attachments { get; set; } = new List(); + public ICollection Embeds { get; set; } = new List(); + public ICollection Reactions { get; set; } = new List(); + public ICollection Mentions { get; set; } = new List(); +} diff --git a/src/DiscordArchiveManager/Models/Entities/ProcessedFile.cs b/src/DiscordArchiveManager/Models/Entities/ProcessedFile.cs new file mode 100644 index 0000000..c775043 --- /dev/null +++ b/src/DiscordArchiveManager/Models/Entities/ProcessedFile.cs @@ -0,0 +1,18 @@ +using System.ComponentModel.DataAnnotations; + +namespace DiscordArchiveManager.Models.Entities; + +public class ProcessedFile +{ + [Key] + public int Id { get; set; } + + [Required] + [MaxLength(1024)] + public string FilePath { get; set; } = null!; + + public DateTime ProcessedAt { get; set; } + + [MaxLength(1024)] + public string? ArchivePath { get; set; } +} diff --git a/src/DiscordArchiveManager/Models/Entities/Reaction.cs b/src/DiscordArchiveManager/Models/Entities/Reaction.cs new file mode 100644 index 0000000..9eaa126 --- /dev/null +++ b/src/DiscordArchiveManager/Models/Entities/Reaction.cs @@ -0,0 +1,27 @@ +using System.ComponentModel.DataAnnotations; +using System.ComponentModel.DataAnnotations.Schema; + +namespace DiscordArchiveManager.Models.Entities; + +public class Reaction +{ + [Key] + public int Id { get; set; } + + [Required] + [MaxLength(32)] + public string MessageId { get; set; } = null!; + + [MaxLength(128)] + public string? EmojiCode { get; set; } + + [Required] + [MaxLength(256)] + public string EmojiName { get; set; } = null!; + + public int Count { get; set; } + + // Navigation properties + [ForeignKey(nameof(MessageId))] + public Message Message { get; set; } = null!; +} diff --git a/src/DiscordArchiveManager/Models/Entities/User.cs b/src/DiscordArchiveManager/Models/Entities/User.cs new file mode 100644 index 0000000..1ca4a48 --- /dev/null +++ b/src/DiscordArchiveManager/Models/Entities/User.cs @@ -0,0 +1,25 @@ +using System.ComponentModel.DataAnnotations; + +namespace DiscordArchiveManager.Models.Entities; + +public class User +{ + [Key] + [MaxLength(32)] + public string Id { get; set; } = null!; + + [Required] + [MaxLength(256)] + public string Name { get; set; } = null!; + + [Required] + [MaxLength(8)] + public string Discriminator { get; set; } = null!; + + public bool IsBot { get; set; } + + // Navigation properties + public ICollection Snapshots { get; set; } = new List(); + public ICollection Messages { get; set; } = new List(); + public ICollection Mentions { get; set; } = new List(); +} diff --git a/src/DiscordArchiveManager/Models/Entities/UserSnapshot.cs b/src/DiscordArchiveManager/Models/Entities/UserSnapshot.cs new file mode 100644 index 0000000..3c99dca --- /dev/null +++ b/src/DiscordArchiveManager/Models/Entities/UserSnapshot.cs @@ -0,0 +1,29 @@ +using System.ComponentModel.DataAnnotations; +using System.ComponentModel.DataAnnotations.Schema; + +namespace DiscordArchiveManager.Models.Entities; + +public class UserSnapshot +{ + [Key] + public int Id { get; set; } + + [Required] + [MaxLength(32)] + public string UserId { get; set; } = null!; + + [MaxLength(256)] + public string? Nickname { get; set; } + + [MaxLength(16)] + public string? Color { get; set; } + + [MaxLength(128)] + public string? AvatarHash { get; set; } + + public DateTime CapturedAt { get; set; } + + // Navigation properties + [ForeignKey(nameof(UserId))] + public User User { get; set; } = null!; +} diff --git a/src/DiscordArchiveManager/Program.cs b/src/DiscordArchiveManager/Program.cs new file mode 100644 index 0000000..b7e606c --- /dev/null +++ b/src/DiscordArchiveManager/Program.cs @@ -0,0 +1,100 @@ +using DiscordArchiveManager.Data; +using DiscordArchiveManager.Services; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging; + +var builder = Host.CreateApplicationBuilder(args); + +// Add configuration +builder.Configuration + .SetBasePath(Directory.GetCurrentDirectory()) + .AddJsonFile("appsettings.json", optional: false) + .AddEnvironmentVariables(); + +// Configure services +builder.Services.AddDbContext(options => + options.UseSqlServer(builder.Configuration.GetConnectionString("Discord"))); + +builder.Services.AddTransient(); +builder.Services.AddTransient(); +builder.Services.AddTransient(); + +var host = builder.Build(); + +// Get configuration +var config = host.Services.GetRequiredService(); +var logger = host.Services.GetRequiredService>(); + +var inputDirectory = config["Paths:InputDirectory"] ?? "/app/input"; +var archiveDirectory = config["Paths:ArchiveDirectory"] ?? "/app/archive"; +var imageDirectory = config["Paths:ImageDirectory"] ?? "/app/images"; + +logger.LogInformation("Discord Archive Manager starting..."); +logger.LogInformation("Input directory: {Path}", inputDirectory); +logger.LogInformation("Archive directory: {Path}", archiveDirectory); +logger.LogInformation("Image directory: {Path}", imageDirectory); + +// Ensure directories exist +Directory.CreateDirectory(inputDirectory); +Directory.CreateDirectory(archiveDirectory); +Directory.CreateDirectory(imageDirectory); + +// Initialize database +using (var scope = host.Services.CreateScope()) +{ + var context = scope.ServiceProvider.GetRequiredService(); + logger.LogInformation("Ensuring database exists and applying migrations..."); + await context.Database.EnsureCreatedAsync(); + logger.LogInformation("Database ready."); +} + +// Process files +using (var scope = host.Services.CreateScope()) +{ + var importService = scope.ServiceProvider.GetRequiredService(); + var archiveService = scope.ServiceProvider.GetRequiredService(); + + var files = archiveService.GetExportFiles(inputDirectory).ToList(); + + if (files.Count == 0) + { + logger.LogInformation("No JSON files found in input directory."); + } + else + { + logger.LogInformation("Found {Count} JSON files to process.", files.Count); + + var successCount = 0; + var skipCount = 0; + var errorCount = 0; + + foreach (var file in files) + { + try + { + var processed = await importService.ProcessFileAsync(file, imageDirectory, archiveDirectory); + if (processed) + { + successCount++; + } + else + { + skipCount++; + } + } + catch (Exception ex) + { + logger.LogError(ex, "Failed to process file: {Path}", file); + errorCount++; + } + } + + logger.LogInformation("Processing complete. Processed: {Success}, Skipped: {Skip}, Errors: {Error}", + successCount, skipCount, errorCount); + } +} + +logger.LogInformation("Discord Archive Manager finished."); diff --git a/src/DiscordArchiveManager/Services/ArchiveService.cs b/src/DiscordArchiveManager/Services/ArchiveService.cs new file mode 100644 index 0000000..a96d349 --- /dev/null +++ b/src/DiscordArchiveManager/Services/ArchiveService.cs @@ -0,0 +1,179 @@ +using Microsoft.Extensions.Logging; + +namespace DiscordArchiveManager.Services; + +public class ArchiveService +{ + private readonly ILogger _logger; + + public ArchiveService(ILogger logger) + { + _logger = logger; + } + + /// + /// Archives a JSON file and its associated _Files directory to the archive location. + /// + /// Path to the JSON file + /// Root directory for archives + /// Path to the archived JSON file + public string ArchiveExport(string jsonFilePath, string archiveRoot) + { + if (!File.Exists(jsonFilePath)) + { + throw new FileNotFoundException($"JSON file not found: {jsonFilePath}"); + } + + var jsonFileName = Path.GetFileName(jsonFilePath); + var jsonDirectory = Path.GetDirectoryName(jsonFilePath)!; + var filesDirectory = GetFilesDirectoryPath(jsonFilePath); + + // Create archive subdirectory based on date + var archiveSubdir = DateTime.Now.ToString("yyyy-MM-dd"); + var archivePath = Path.Combine(archiveRoot, archiveSubdir); + Directory.CreateDirectory(archivePath); + + // Archive the JSON file + var archivedJsonPath = Path.Combine(archivePath, jsonFileName); + var uniqueJsonPath = GetUniquePath(archivedJsonPath); + File.Move(jsonFilePath, uniqueJsonPath); + _logger.LogInformation("Archived JSON file to {Path}", uniqueJsonPath); + + // Archive the _Files directory if it exists + if (Directory.Exists(filesDirectory)) + { + var filesDirectoryName = Path.GetFileName(filesDirectory); + var archivedFilesPath = Path.Combine(archivePath, filesDirectoryName); + var uniqueFilesPath = GetUniquePath(archivedFilesPath); + Directory.Move(filesDirectory, uniqueFilesPath); + _logger.LogInformation("Archived files directory to {Path}", uniqueFilesPath); + } + + return uniqueJsonPath; + } + + /// + /// Gets the path to the _Files directory associated with a JSON export file. + /// DiscordChatExporter creates directories named like "filename.json_Files" + /// + public string GetFilesDirectoryPath(string jsonFilePath) + { + return jsonFilePath + "_Files"; + } + + /// + /// Gets the path to a specific file within the _Files directory. + /// + public string? GetAttachmentFilePath(string jsonFilePath, string attachmentUrl) + { + var filesDirectory = GetFilesDirectoryPath(jsonFilePath); + if (!Directory.Exists(filesDirectory)) + { + return null; + } + + // DiscordChatExporter stores files with their original filename + // The URL format is usually like: https://cdn.discordapp.com/attachments/.../filename.ext + string fileName; + try + { + if (string.IsNullOrWhiteSpace(attachmentUrl)) + { + return null; + } + fileName = Path.GetFileName(new Uri(attachmentUrl).LocalPath); + } + catch (UriFormatException) + { + // If URL is malformed, try to extract filename directly + fileName = Path.GetFileName(attachmentUrl); + if (string.IsNullOrWhiteSpace(fileName)) + { + return null; + } + } + var filePath = Path.Combine(filesDirectory, fileName); + + // Also check for URL-encoded versions + var decodedFileName = Uri.UnescapeDataString(fileName); + var decodedFilePath = Path.Combine(filesDirectory, decodedFileName); + + if (File.Exists(filePath)) + { + return filePath; + } + if (File.Exists(decodedFilePath)) + { + return decodedFilePath; + } + + // Search for the file by partial match (in case of naming differences) + var searchPattern = "*" + Path.GetExtension(fileName); + var files = Directory.GetFiles(filesDirectory, searchPattern); + + // Try to find a file that contains the attachment ID from the URL + var urlParts = attachmentUrl.Split('/'); + foreach (var file in files) + { + var currentFileName = Path.GetFileName(file); + if (currentFileName.Equals(fileName, StringComparison.OrdinalIgnoreCase) || + currentFileName.Equals(decodedFileName, StringComparison.OrdinalIgnoreCase)) + { + return file; + } + } + + return null; + } + + /// + /// Lists all JSON export files in the input directory. + /// + public IEnumerable GetExportFiles(string inputDirectory) + { + if (!Directory.Exists(inputDirectory)) + { + _logger.LogWarning("Input directory does not exist: {Path}", inputDirectory); + yield break; + } + + foreach (var file in Directory.EnumerateFiles(inputDirectory, "*.json", SearchOption.AllDirectories)) + { + // Skip files in hidden directories (starting with .) + var relativePath = Path.GetRelativePath(inputDirectory, file); + if (relativePath.Split(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar) + .Any(part => part.StartsWith('.'))) + { + _logger.LogDebug("Skipping file in hidden directory: {Path}", file); + continue; + } + + yield return file; + } + } + + /// + /// Gets a unique file/directory path by appending a number if the path already exists. + /// + private static string GetUniquePath(string path) + { + if (!File.Exists(path) && !Directory.Exists(path)) + { + return path; + } + + var directory = Path.GetDirectoryName(path)!; + var fileName = Path.GetFileNameWithoutExtension(path); + var extension = Path.GetExtension(path); + + var counter = 1; + string newPath; + do + { + newPath = Path.Combine(directory, $"{fileName}_{counter}{extension}"); + counter++; + } while (File.Exists(newPath) || Directory.Exists(newPath)); + + return newPath; + } +} diff --git a/src/DiscordArchiveManager/Services/ImageHashService.cs b/src/DiscordArchiveManager/Services/ImageHashService.cs new file mode 100644 index 0000000..8130896 --- /dev/null +++ b/src/DiscordArchiveManager/Services/ImageHashService.cs @@ -0,0 +1,94 @@ +using System.Security.Cryptography; +using Microsoft.Extensions.Logging; + +namespace DiscordArchiveManager.Services; + +public class ImageHashService +{ + private readonly ILogger _logger; + + public ImageHashService(ILogger logger) + { + _logger = logger; + } + + /// + /// Processes an image file by calculating its hash and copying it to a content-addressed storage location. + /// + /// Full path to the source file + /// Root directory for content-addressed storage + /// Tuple containing the hash and the relative storage path from destRoot + public (string hash, string storagePath) ProcessImage(string sourcePath, string destRoot) + { + if (!File.Exists(sourcePath)) + { + throw new FileNotFoundException($"Source file not found: {sourcePath}"); + } + + // Calculate SHA256 hash + var hashBytes = SHA256.HashData(File.ReadAllBytes(sourcePath)); + var hashStr = Convert.ToHexString(hashBytes).ToLowerInvariant(); + + // Create 2-level directory structure: /ab/cd/abcdef...ext + var level1 = hashStr[..2]; + var level2 = hashStr[2..4]; + var extension = Path.GetExtension(sourcePath).ToLowerInvariant(); + var filename = $"{hashStr}{extension}"; + + var relativeDir = Path.Combine(level1, level2); + var relativePath = Path.Combine(relativeDir, filename); + var absoluteDir = Path.Combine(destRoot, relativeDir); + var absolutePath = Path.Combine(destRoot, relativePath); + + // Create directory if needed and copy file if it doesn't exist + if (!File.Exists(absolutePath)) + { + Directory.CreateDirectory(absoluteDir); + File.Copy(sourcePath, absolutePath); + _logger.LogDebug("Copied {Source} to {Dest}", sourcePath, absolutePath); + } + else + { + _logger.LogDebug("File already exists at {Path}, skipping copy", absolutePath); + } + + return (hashStr, relativePath); + } + + /// + /// Checks if a file with the given hash already exists in storage. + /// + public bool HashExists(string hash, string destRoot, string extension) + { + var level1 = hash[..2]; + var level2 = hash[2..4]; + var filename = $"{hash}{extension.ToLowerInvariant()}"; + var absolutePath = Path.Combine(destRoot, level1, level2, filename); + return File.Exists(absolutePath); + } + + /// + /// Gets the relative storage path for a given hash and extension. + /// + public string GetStoragePath(string hash, string extension) + { + var level1 = hash[..2]; + var level2 = hash[2..4]; + var filename = $"{hash}{extension.ToLowerInvariant()}"; + return Path.Combine(level1, level2, filename); + } + + /// + /// Calculates the SHA256 hash of a file without copying it. + /// + public string CalculateHash(string filePath) + { + if (!File.Exists(filePath)) + { + throw new FileNotFoundException($"File not found: {filePath}"); + } + + var hashBytes = SHA256.HashData(File.ReadAllBytes(filePath)); + return Convert.ToHexString(hashBytes).ToLowerInvariant(); + } +} diff --git a/src/DiscordArchiveManager/Services/JsonImportService.cs b/src/DiscordArchiveManager/Services/JsonImportService.cs new file mode 100644 index 0000000..c323344 --- /dev/null +++ b/src/DiscordArchiveManager/Services/JsonImportService.cs @@ -0,0 +1,386 @@ +using System.Text.Json; +using DiscordArchiveManager.Data; +using DiscordArchiveManager.Models; +using DiscordArchiveManager.Models.Entities; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Logging; + +namespace DiscordArchiveManager.Services; + +public class JsonImportService +{ + private readonly DiscordArchiveContext _context; + private readonly ImageHashService _imageHashService; + private readonly ArchiveService _archiveService; + private readonly ILogger _logger; + + private static readonly JsonSerializerOptions JsonOptions = new() + { + PropertyNameCaseInsensitive = true + }; + + public JsonImportService( + DiscordArchiveContext context, + ImageHashService imageHashService, + ArchiveService archiveService, + ILogger logger) + { + _context = context; + _imageHashService = imageHashService; + _archiveService = archiveService; + _logger = logger; + } + + /// + /// Processes a single JSON export file. + /// + public async Task ProcessFileAsync(string jsonFilePath, string imageRoot, string archiveRoot) + { + _logger.LogInformation("Processing file: {Path}", jsonFilePath); + + // Check if already processed + var normalizedPath = Path.GetFullPath(jsonFilePath); + if (await _context.ProcessedFiles.AnyAsync(p => p.FilePath == normalizedPath)) + { + _logger.LogInformation("File already processed, skipping: {Path}", jsonFilePath); + return false; + } + + try + { + // Parse JSON + var json = await File.ReadAllTextAsync(jsonFilePath); + var export = JsonSerializer.Deserialize(json, JsonOptions); + + if (export == null) + { + _logger.LogError("Failed to deserialize JSON file: {Path}", jsonFilePath); + return false; + } + + _logger.LogInformation("Parsed {Count} messages from {Channel} in {Guild}", + export.MessageCount, export.Channel.Name, export.Guild.Name); + + // Process in a transaction + await using var transaction = await _context.Database.BeginTransactionAsync(); + + try + { + // Upsert Guild + await UpsertGuildAsync(export.Guild); + + // Upsert Channel + await UpsertChannelAsync(export.Channel, export.Guild.Id); + + // Process messages + var processedCount = 0; + foreach (var message in export.Messages) + { + if (await ProcessMessageAsync(message, export.Channel.Id, jsonFilePath, imageRoot)) + { + processedCount++; + } + } + + _logger.LogInformation("Processed {Count} new messages", processedCount); + + await _context.SaveChangesAsync(); + + // Archive the file + var archivePath = _archiveService.ArchiveExport(jsonFilePath, archiveRoot); + + // Record as processed + _context.ProcessedFiles.Add(new ProcessedFile + { + FilePath = normalizedPath, + ProcessedAt = DateTime.UtcNow, + ArchivePath = archivePath + }); + + await _context.SaveChangesAsync(); + await transaction.CommitAsync(); + + _logger.LogInformation("Successfully processed and archived: {Path}", jsonFilePath); + return true; + } + catch (Exception ex) + { + await transaction.RollbackAsync(); + _logger.LogError(ex, "Error processing file, rolled back transaction: {Path}", jsonFilePath); + throw; + } + } + catch (JsonException ex) + { + _logger.LogError(ex, "Failed to parse JSON file: {Path}", jsonFilePath); + return false; + } + } + + private async Task UpsertGuildAsync(GuildInfo guildInfo) + { + var guild = await _context.Guilds.FindAsync(guildInfo.Id); + if (guild == null) + { + guild = new Guild + { + Id = guildInfo.Id, + Name = guildInfo.Name, + IconPath = guildInfo.IconUrl + }; + _context.Guilds.Add(guild); + _logger.LogDebug("Added new guild: {Name} ({Id})", guildInfo.Name, guildInfo.Id); + } + else + { + guild.Name = guildInfo.Name; + if (!string.IsNullOrEmpty(guildInfo.IconUrl)) + { + guild.IconPath = guildInfo.IconUrl; + } + _logger.LogDebug("Updated guild: {Name} ({Id})", guildInfo.Name, guildInfo.Id); + } + } + + private async Task UpsertChannelAsync(ChannelInfo channelInfo, string guildId) + { + var channel = await _context.Channels.FindAsync(channelInfo.Id); + if (channel == null) + { + channel = new Channel + { + Id = channelInfo.Id, + GuildId = guildId, + Type = channelInfo.Type, + Category = channelInfo.Category, + Name = channelInfo.Name, + Topic = channelInfo.Topic + }; + _context.Channels.Add(channel); + _logger.LogDebug("Added new channel: {Name} ({Id})", channelInfo.Name, channelInfo.Id); + } + else + { + channel.Type = channelInfo.Type; + channel.Category = channelInfo.Category; + channel.Name = channelInfo.Name; + channel.Topic = channelInfo.Topic; + _logger.LogDebug("Updated channel: {Name} ({Id})", channelInfo.Name, channelInfo.Id); + } + } + + private async Task ProcessMessageAsync(MessageInfo messageInfo, string channelId, string jsonFilePath, string imageRoot) + { + // Skip if message already exists + if (await _context.Messages.AnyAsync(m => m.Id == messageInfo.Id)) + { + return false; + } + + // Upsert author + await UpsertUserAsync(messageInfo.Author); + + // Create message + var message = new Message + { + Id = messageInfo.Id, + ChannelId = channelId, + AuthorId = messageInfo.Author.Id, + Type = messageInfo.Type, + Timestamp = messageInfo.Timestamp, + TimestampEdited = messageInfo.TimestampEdited, + IsPinned = messageInfo.IsPinned, + Content = messageInfo.Content, + ReferenceMessageId = messageInfo.Reference?.MessageId + }; + _context.Messages.Add(message); + + // Process attachments + foreach (var attachment in messageInfo.Attachments) + { + await ProcessAttachmentAsync(attachment, messageInfo.Id, jsonFilePath, imageRoot); + } + + // Process embeds + foreach (var embed in messageInfo.Embeds) + { + ProcessEmbed(embed, messageInfo.Id); + } + + // Process reactions + foreach (var reaction in messageInfo.Reactions) + { + ProcessReaction(reaction, messageInfo.Id); + } + + // Process mentions + foreach (var mention in messageInfo.Mentions) + { + await ProcessMentionAsync(mention, messageInfo.Id); + } + + return true; + } + + private async Task UpsertUserAsync(AuthorInfo authorInfo) + { + var user = await _context.Users.FindAsync(authorInfo.Id); + if (user == null) + { + user = new User + { + Id = authorInfo.Id, + Name = authorInfo.Name, + Discriminator = authorInfo.Discriminator, + IsBot = authorInfo.IsBot + }; + _context.Users.Add(user); + } + else + { + // Update basic info if changed + user.Name = authorInfo.Name; + user.Discriminator = authorInfo.Discriminator; + user.IsBot = authorInfo.IsBot; + } + + // Check if we need a new snapshot (nickname, color, or avatar changed) + var avatarHash = !string.IsNullOrEmpty(authorInfo.AvatarUrl) + ? ExtractAvatarHash(authorInfo.AvatarUrl) + : null; + + var existingSnapshot = await _context.UserSnapshots + .Where(s => s.UserId == authorInfo.Id) + .Where(s => s.Nickname == authorInfo.Nickname && + s.Color == authorInfo.Color && + s.AvatarHash == avatarHash) + .FirstOrDefaultAsync(); + + if (existingSnapshot == null) + { + var snapshot = new UserSnapshot + { + UserId = authorInfo.Id, + Nickname = authorInfo.Nickname, + Color = authorInfo.Color, + AvatarHash = avatarHash, + CapturedAt = DateTime.UtcNow + }; + _context.UserSnapshots.Add(snapshot); + } + } + + private static string? ExtractAvatarHash(string avatarUrl) + { + // Avatar URLs look like: https://cdn.discordapp.com/avatars/123456/abcdef123.png + // We want to extract the hash part (abcdef123) + try + { + var uri = new Uri(avatarUrl); + var segments = uri.Segments; + if (segments.Length > 0) + { + var lastSegment = segments[^1].TrimEnd('/'); + return Path.GetFileNameWithoutExtension(lastSegment); + } + } + catch + { + // Ignore parsing errors + } + return null; + } + + private async Task ProcessAttachmentAsync(AttachmentInfo attachmentInfo, string messageId, string jsonFilePath, string imageRoot) + { + // Try to find the local file + var localPath = _archiveService.GetAttachmentFilePath(jsonFilePath, attachmentInfo.Url); + + string contentHash; + string storagePath; + + if (localPath != null && File.Exists(localPath)) + { + // Process and hash the local file + (contentHash, storagePath) = _imageHashService.ProcessImage(localPath, imageRoot); + _logger.LogDebug("Processed attachment: {Filename} -> {Path}", attachmentInfo.FileName, storagePath); + } + else + { + // File not found locally, store the URL as a placeholder + contentHash = "url:" + attachmentInfo.Url; + storagePath = attachmentInfo.Url; + _logger.LogWarning("Attachment file not found locally, storing URL: {Url}", attachmentInfo.Url); + } + + // Check if this exact attachment already exists for this message + var existingAttachment = await _context.Attachments + .Where(a => a.MessageId == messageId && a.ContentHash == contentHash) + .FirstOrDefaultAsync(); + + if (existingAttachment == null) + { + var attachment = new Attachment + { + MessageId = messageId, + OriginalFilename = attachmentInfo.FileName, + ContentHash = contentHash, + StoragePath = storagePath, + FileSize = attachmentInfo.FileSizeBytes, + Width = attachmentInfo.Width, + Height = attachmentInfo.Height + }; + _context.Attachments.Add(attachment); + } + } + + private void ProcessEmbed(EmbedInfo embedInfo, string messageId) + { + var embed = new Embed + { + MessageId = messageId, + Title = embedInfo.Title, + Url = embedInfo.Url, + Description = embedInfo.Description, + Color = embedInfo.Color, + ThumbnailPath = embedInfo.Thumbnail?.Url + }; + _context.Embeds.Add(embed); + } + + private void ProcessReaction(ReactionInfo reactionInfo, string messageId) + { + var reaction = new Reaction + { + MessageId = messageId, + EmojiCode = reactionInfo.Emoji.Code, + EmojiName = reactionInfo.Emoji.Name, + Count = reactionInfo.Count + }; + _context.Reactions.Add(reaction); + } + + private async Task ProcessMentionAsync(MentionInfo mentionInfo, string messageId) + { + // Ensure the mentioned user exists + var user = await _context.Users.FindAsync(mentionInfo.Id); + if (user == null) + { + user = new User + { + Id = mentionInfo.Id, + Name = mentionInfo.Name, + Discriminator = mentionInfo.Discriminator, + IsBot = mentionInfo.IsBot + }; + _context.Users.Add(user); + } + + var mention = new Mention + { + MessageId = messageId, + UserId = mentionInfo.Id + }; + _context.Mentions.Add(mention); + } +} diff --git a/src/DiscordArchiveManager/appsettings.json b/src/DiscordArchiveManager/appsettings.json new file mode 100644 index 0000000..54a6d5e --- /dev/null +++ b/src/DiscordArchiveManager/appsettings.json @@ -0,0 +1,16 @@ +{ + "ConnectionStrings": { + "Discord": "Server=192.168.10.99;Database=DiscordArchive;User Id=sa;Password=Fluffy-1507;TrustServerCertificate=true" + }, + "Paths": { + "InputDirectory": "C:\\Users\\aisaacs\\Desktop\\New folder", + "ArchiveDirectory": "C:\\Users\\aisaacs\\Desktop\\New folder\\archive", + "ImageDirectory": "C:\\Users\\aisaacs\\Desktop\\New folder\\images" + }, + "Logging": { + "LogLevel": { + "Default": "Information", + "Microsoft.EntityFrameworkCore": "Warning" + } + } +}