From 74e05bcf3b3549c7f8fbe0bb78cae53798b6ea32 Mon Sep 17 00:00:00 2001 From: AJ Isaacs Date: Thu, 15 Jan 2026 23:51:27 -0500 Subject: [PATCH] Add Discord JSON to text converter script Converts Discord chat exports (JSON) to a simplified, human-readable text format with the following features: - Handles both list and dict JSON export formats - Sorts messages chronologically - Groups consecutive messages from the same user - Includes attachments, embeds, and stickers as annotations - Adds date headers when the day changes - Shows file size reduction stats after conversion Co-Authored-By: Claude Opus 4.5 --- convert_discord.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 convert_discord.py diff --git a/convert_discord.py b/convert_discord.py new file mode 100644 index 0000000..268b949 --- /dev/null +++ b/convert_discord.py @@ -0,0 +1,89 @@ +import json +import argparse +from datetime import datetime, timezone + +parser = argparse.ArgumentParser(description='Convert Discord JSON export to simplified text format') +parser.add_argument('json_file', help='Path to the Discord JSON export file') +args = parser.parse_args() + +with open(args.json_file, 'r', encoding='utf-8') as f: + data = json.load(f) + +# Derive output filename from input +output_file = args.json_file.rsplit('.', 1)[0] + '_chat.txt' + +lines = [] + +# Handle both formats: list of messages or dict with guild/channel/messages +if isinstance(data, list): + messages = data +else: + lines.append(f"[Server: {data['guild']['name']} | Channel: #{data['channel']['name']}]") + lines.append(f"[Date: {data['dateRange']['after'][:10]}]") + lines.append("") + messages = data["messages"] + +# Sort messages by timestamp in ascending order +messages = sorted(messages, key=lambda m: m["timestamp"]) + +last_user = None +last_date = None + +for msg in messages: + content = msg.get("content", "").strip() + + # Handle attachments + if msg.get("attachments"): + for att in msg["attachments"]: + att_name = att.get("fileName") or att.get("filename", "file") + content += f" [Attachment: {att_name}]" if content else f"[Attachment: {att_name}]" + + # Handle embeds + if msg.get("embeds"): + for emb in msg["embeds"]: + emb_title = emb.get("title", "embed") + content += f" [Embed: {emb_title}]" if content else f"[Embed: {emb_title}]" + + # Handle stickers + if msg.get("stickers"): + for stk in msg["stickers"]: + stk_name = stk.get("name", "sticker") + content += f" [Sticker: {stk_name}]" if content else f"[Sticker: {stk_name}]" + + if not content: + continue + + author = msg["author"] + user = author.get("name") or author.get("global_name") or author.get("username", "Unknown") + ts = datetime.fromisoformat(msg["timestamp"]).astimezone() + date_str = ts.strftime("%Y-%m-%d") + time_str = ts.strftime("%H:%M") + + # Add date header if day changed + if date_str != last_date: + if last_date: + lines.append("") + lines.append(f"--- {date_str} ---") + last_date = date_str + + # Compact consecutive messages from same user + if user == last_user: + lines.append(f" {content}") + else: + lines.append(f"[{time_str}] {user}: {content}") + last_user = user + +output = "\n".join(lines) + +with open(output_file, 'w', encoding='utf-8') as f: + f.write(output) + +with open(args.json_file, 'r', encoding='utf-8') as f: + original_size = len(f.read()) + +print(f"Original JSON: {original_size} chars") +print(f"Simplified TXT: {len(output)} chars") +print(f"Reduction: {100 - (len(output) / original_size * 100):.1f}%") +print(f"Output saved to: {output_file}") +print(f"\nFirst 2000 chars preview:\n") +print(output[:2000].encode('cp1252', errors='replace').decode('cp1252'))