discord-chat-to-text/convert_discord.py

import json
import argparse
from datetime import datetime, timezone

parser = argparse.ArgumentParser(description='Convert Discord JSON export to simplified text format')
parser.add_argument('json_file', help='Path to the Discord JSON export file')
args = parser.parse_args()

with open(args.json_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Derive output filename from input
output_file = args.json_file.rsplit('.', 1)[0] + '_chat.txt'

lines = []

# Handle both formats: list of messages or dict with guild/channel/messages
if isinstance(data, list):
    messages = data
else:
    lines.append(f"[Server: {data['guild']['name']} | Channel: #{data['channel']['name']}]")
    lines.append(f"[Date: {data['dateRange']['after'][:10]}]")
    lines.append("")
    messages = data["messages"]

# Sort messages by timestamp in ascending order
messages = sorted(messages, key=lambda m: m["timestamp"])

last_user = None
last_date = None

for msg in messages:
    content = msg.get("content", "").strip()

    # Handle attachments
    if msg.get("attachments"):
        for att in msg["attachments"]:
            att_name = att.get("fileName") or att.get("filename", "file")
            content += f" [Attachment: {att_name}]" if content else f"[Attachment: {att_name}]"

    # Handle embeds
    if msg.get("embeds"):
        for emb in msg["embeds"]:
            emb_title = emb.get("title", "embed")
            content += f" [Embed: {emb_title}]" if content else f"[Embed: {emb_title}]"

    # Handle stickers
    if msg.get("stickers"):
        for stk in msg["stickers"]:
            stk_name = stk.get("name", "sticker")
            content += f" [Sticker: {stk_name}]" if content else f"[Sticker: {stk_name}]"

    if not content:
        continue

    author = msg["author"]
    user = author.get("name") or author.get("global_name") or author.get("username", "Unknown")
    ts = datetime.fromisoformat(msg["timestamp"]).astimezone()
    date_str = ts.strftime("%Y-%m-%d")
    time_str = ts.strftime("%H:%M")

    # Add date header if day changed
    if date_str != last_date:
        if last_date:
            lines.append("")
        lines.append(f"--- {date_str} ---")
        last_date = date_str

    # Compact consecutive messages from same user
    if user == last_user:
        lines.append(f"       {content}")
    else:
        lines.append(f"[{time_str}] {user}: {content}")
        last_user = user

output = "\n".join(lines)

with open(output_file, 'w', encoding='utf-8') as f:
    f.write(output)

with open(args.json_file, 'r', encoding='utf-8') as f:
    original_size = len(f.read())

print(f"Original JSON: {original_size} chars")
print(f"Simplified TXT: {len(output)} chars")
print(f"Reduction: {100 - (len(output) / original_size * 100):.1f}%")
print(f"Output saved to: {output_file}")
print(f"\nFirst 2000 chars preview:\n")
print(output[:2000].encode('cp1252', errors='replace').decode('cp1252'))