Converts Discord chat exports (JSON) to a simplified, human-readable text format with the following features: - Handles both list and dict JSON export formats - Sorts messages chronologically - Groups consecutive messages from the same user - Includes attachments, embeds, and stickers as annotations - Adds date headers when the day changes - Shows file size reduction stats after conversion Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
90 lines
2.9 KiB
Python
90 lines
2.9 KiB
Python
import json
|
|
import argparse
|
|
from datetime import datetime, timezone
|
|
|
|
parser = argparse.ArgumentParser(description='Convert Discord JSON export to simplified text format')
|
|
parser.add_argument('json_file', help='Path to the Discord JSON export file')
|
|
args = parser.parse_args()
|
|
|
|
with open(args.json_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Derive output filename from input
|
|
output_file = args.json_file.rsplit('.', 1)[0] + '_chat.txt'
|
|
|
|
lines = []
|
|
|
|
# Handle both formats: list of messages or dict with guild/channel/messages
|
|
if isinstance(data, list):
|
|
messages = data
|
|
else:
|
|
lines.append(f"[Server: {data['guild']['name']} | Channel: #{data['channel']['name']}]")
|
|
lines.append(f"[Date: {data['dateRange']['after'][:10]}]")
|
|
lines.append("")
|
|
messages = data["messages"]
|
|
|
|
# Sort messages by timestamp in ascending order
|
|
messages = sorted(messages, key=lambda m: m["timestamp"])
|
|
|
|
last_user = None
|
|
last_date = None
|
|
|
|
for msg in messages:
|
|
content = msg.get("content", "").strip()
|
|
|
|
# Handle attachments
|
|
if msg.get("attachments"):
|
|
for att in msg["attachments"]:
|
|
att_name = att.get("fileName") or att.get("filename", "file")
|
|
content += f" [Attachment: {att_name}]" if content else f"[Attachment: {att_name}]"
|
|
|
|
# Handle embeds
|
|
if msg.get("embeds"):
|
|
for emb in msg["embeds"]:
|
|
emb_title = emb.get("title", "embed")
|
|
content += f" [Embed: {emb_title}]" if content else f"[Embed: {emb_title}]"
|
|
|
|
# Handle stickers
|
|
if msg.get("stickers"):
|
|
for stk in msg["stickers"]:
|
|
stk_name = stk.get("name", "sticker")
|
|
content += f" [Sticker: {stk_name}]" if content else f"[Sticker: {stk_name}]"
|
|
|
|
if not content:
|
|
continue
|
|
|
|
author = msg["author"]
|
|
user = author.get("name") or author.get("global_name") or author.get("username", "Unknown")
|
|
ts = datetime.fromisoformat(msg["timestamp"]).astimezone()
|
|
date_str = ts.strftime("%Y-%m-%d")
|
|
time_str = ts.strftime("%H:%M")
|
|
|
|
# Add date header if day changed
|
|
if date_str != last_date:
|
|
if last_date:
|
|
lines.append("")
|
|
lines.append(f"--- {date_str} ---")
|
|
last_date = date_str
|
|
|
|
# Compact consecutive messages from same user
|
|
if user == last_user:
|
|
lines.append(f" {content}")
|
|
else:
|
|
lines.append(f"[{time_str}] {user}: {content}")
|
|
last_user = user
|
|
|
|
output = "\n".join(lines)
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write(output)
|
|
|
|
with open(args.json_file, 'r', encoding='utf-8') as f:
|
|
original_size = len(f.read())
|
|
|
|
print(f"Original JSON: {original_size} chars")
|
|
print(f"Simplified TXT: {len(output)} chars")
|
|
print(f"Reduction: {100 - (len(output) / original_size * 100):.1f}%")
|
|
print(f"Output saved to: {output_file}")
|
|
print(f"\nFirst 2000 chars preview:\n")
|
|
print(output[:2000].encode('cp1252', errors='replace').decode('cp1252'))
|