Add Discord JSON to text converter script

Converts Discord chat exports (JSON) to a simplified, human-readable text format with the following features: - Handles both list and dict JSON export formats - Sorts messages chronologically - Groups consecutive messages from the same user - Includes attachments, embeds, and stickers as annotations - Adds date headers when the day changes - Shows file size reduction stats after conversion Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-15 23:51:27 -05:00
commit 74e05bcf3b
1 changed files with 89 additions and 0 deletions
--- a/convert_discord.py
+++ b/convert_discord.py
@@ -0,0 +1,89 @@
+import json
+import argparse
+from datetime import datetime, timezone
+
+parser = argparse.ArgumentParser(description='Convert Discord JSON export to simplified text format')
+parser.add_argument('json_file', help='Path to the Discord JSON export file')
+args = parser.parse_args()
+
+with open(args.json_file, 'r', encoding='utf-8') as f:
+    data = json.load(f)
+
+# Derive output filename from input
+output_file = args.json_file.rsplit('.', 1)[0] + '_chat.txt'
+
+lines = []
+
+# Handle both formats: list of messages or dict with guild/channel/messages
+if isinstance(data, list):
+    messages = data
+else:
+    lines.append(f"[Server: {data['guild']['name']} | Channel: #{data['channel']['name']}]")
+    lines.append(f"[Date: {data['dateRange']['after'][:10]}]")
+    lines.append("")
+    messages = data["messages"]
+
+# Sort messages by timestamp in ascending order
+messages = sorted(messages, key=lambda m: m["timestamp"])
+
+last_user = None
+last_date = None
+
+for msg in messages:
+    content = msg.get("content", "").strip()
+
+    # Handle attachments
+    if msg.get("attachments"):
+        for att in msg["attachments"]:
+            att_name = att.get("fileName") or att.get("filename", "file")
+            content += f" [Attachment: {att_name}]" if content else f"[Attachment: {att_name}]"
+
+    # Handle embeds
+    if msg.get("embeds"):
+        for emb in msg["embeds"]:
+            emb_title = emb.get("title", "embed")
+            content += f" [Embed: {emb_title}]" if content else f"[Embed: {emb_title}]"
+
+    # Handle stickers
+    if msg.get("stickers"):
+        for stk in msg["stickers"]:
+            stk_name = stk.get("name", "sticker")
+            content += f" [Sticker: {stk_name}]" if content else f"[Sticker: {stk_name}]"
+
+    if not content:
+        continue
+
+    author = msg["author"]
+    user = author.get("name") or author.get("global_name") or author.get("username", "Unknown")
+    ts = datetime.fromisoformat(msg["timestamp"]).astimezone()
+    date_str = ts.strftime("%Y-%m-%d")
+    time_str = ts.strftime("%H:%M")
+
+    # Add date header if day changed
+    if date_str != last_date:
+        if last_date:
+            lines.append("")
+        lines.append(f"--- {date_str} ---")
+        last_date = date_str
+
+    # Compact consecutive messages from same user
+    if user == last_user:
+        lines.append(f"       {content}")
+    else:
+        lines.append(f"[{time_str}] {user}: {content}")
+        last_user = user
+
+output = "\n".join(lines)
+
+with open(output_file, 'w', encoding='utf-8') as f:
+    f.write(output)
+
+with open(args.json_file, 'r', encoding='utf-8') as f:
+    original_size = len(f.read())
+
+print(f"Original JSON: {original_size} chars")
+print(f"Simplified TXT: {len(output)} chars")
+print(f"Reduction: {100 - (len(output) / original_size * 100):.1f}%")
+print(f"Output saved to: {output_file}")
+print(f"\nFirst 2000 chars preview:\n")
+print(output[:2000].encode('cp1252', errors='replace').decode('cp1252'))