Files
discord-chat-to-text/convert_discord.py
AJ Isaacs 74e05bcf3b Add Discord JSON to text converter script
Converts Discord chat exports (JSON) to a simplified, human-readable
text format with the following features:
- Handles both list and dict JSON export formats
- Sorts messages chronologically
- Groups consecutive messages from the same user
- Includes attachments, embeds, and stickers as annotations
- Adds date headers when the day changes
- Shows file size reduction stats after conversion

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-15 23:51:27 -05:00

90 lines
2.9 KiB
Python

import json
import argparse
from datetime import datetime, timezone
parser = argparse.ArgumentParser(description='Convert Discord JSON export to simplified text format')
parser.add_argument('json_file', help='Path to the Discord JSON export file')
args = parser.parse_args()
with open(args.json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
# Derive output filename from input
output_file = args.json_file.rsplit('.', 1)[0] + '_chat.txt'
lines = []
# Handle both formats: list of messages or dict with guild/channel/messages
if isinstance(data, list):
messages = data
else:
lines.append(f"[Server: {data['guild']['name']} | Channel: #{data['channel']['name']}]")
lines.append(f"[Date: {data['dateRange']['after'][:10]}]")
lines.append("")
messages = data["messages"]
# Sort messages by timestamp in ascending order
messages = sorted(messages, key=lambda m: m["timestamp"])
last_user = None
last_date = None
for msg in messages:
content = msg.get("content", "").strip()
# Handle attachments
if msg.get("attachments"):
for att in msg["attachments"]:
att_name = att.get("fileName") or att.get("filename", "file")
content += f" [Attachment: {att_name}]" if content else f"[Attachment: {att_name}]"
# Handle embeds
if msg.get("embeds"):
for emb in msg["embeds"]:
emb_title = emb.get("title", "embed")
content += f" [Embed: {emb_title}]" if content else f"[Embed: {emb_title}]"
# Handle stickers
if msg.get("stickers"):
for stk in msg["stickers"]:
stk_name = stk.get("name", "sticker")
content += f" [Sticker: {stk_name}]" if content else f"[Sticker: {stk_name}]"
if not content:
continue
author = msg["author"]
user = author.get("name") or author.get("global_name") or author.get("username", "Unknown")
ts = datetime.fromisoformat(msg["timestamp"]).astimezone()
date_str = ts.strftime("%Y-%m-%d")
time_str = ts.strftime("%H:%M")
# Add date header if day changed
if date_str != last_date:
if last_date:
lines.append("")
lines.append(f"--- {date_str} ---")
last_date = date_str
# Compact consecutive messages from same user
if user == last_user:
lines.append(f" {content}")
else:
lines.append(f"[{time_str}] {user}: {content}")
last_user = user
output = "\n".join(lines)
with open(output_file, 'w', encoding='utf-8') as f:
f.write(output)
with open(args.json_file, 'r', encoding='utf-8') as f:
original_size = len(f.read())
print(f"Original JSON: {original_size} chars")
print(f"Simplified TXT: {len(output)} chars")
print(f"Reduction: {100 - (len(output) / original_size * 100):.1f}%")
print(f"Output saved to: {output_file}")
print(f"\nFirst 2000 chars preview:\n")
print(output[:2000].encode('cp1252', errors='replace').decode('cp1252'))