Add Discord JSON to text converter script
Converts Discord chat exports (JSON) to a simplified, human-readable text format with the following features: - Handles both list and dict JSON export formats - Sorts messages chronologically - Groups consecutive messages from the same user - Includes attachments, embeds, and stickers as annotations - Adds date headers when the day changes - Shows file size reduction stats after conversion Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
89
convert_discord.py
Normal file
89
convert_discord.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import json
|
||||
import argparse
|
||||
from datetime import datetime, timezone
|
||||
|
||||
parser = argparse.ArgumentParser(description='Convert Discord JSON export to simplified text format')
|
||||
parser.add_argument('json_file', help='Path to the Discord JSON export file')
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.json_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Derive output filename from input
|
||||
output_file = args.json_file.rsplit('.', 1)[0] + '_chat.txt'
|
||||
|
||||
lines = []
|
||||
|
||||
# Handle both formats: list of messages or dict with guild/channel/messages
|
||||
if isinstance(data, list):
|
||||
messages = data
|
||||
else:
|
||||
lines.append(f"[Server: {data['guild']['name']} | Channel: #{data['channel']['name']}]")
|
||||
lines.append(f"[Date: {data['dateRange']['after'][:10]}]")
|
||||
lines.append("")
|
||||
messages = data["messages"]
|
||||
|
||||
# Sort messages by timestamp in ascending order
|
||||
messages = sorted(messages, key=lambda m: m["timestamp"])
|
||||
|
||||
last_user = None
|
||||
last_date = None
|
||||
|
||||
for msg in messages:
|
||||
content = msg.get("content", "").strip()
|
||||
|
||||
# Handle attachments
|
||||
if msg.get("attachments"):
|
||||
for att in msg["attachments"]:
|
||||
att_name = att.get("fileName") or att.get("filename", "file")
|
||||
content += f" [Attachment: {att_name}]" if content else f"[Attachment: {att_name}]"
|
||||
|
||||
# Handle embeds
|
||||
if msg.get("embeds"):
|
||||
for emb in msg["embeds"]:
|
||||
emb_title = emb.get("title", "embed")
|
||||
content += f" [Embed: {emb_title}]" if content else f"[Embed: {emb_title}]"
|
||||
|
||||
# Handle stickers
|
||||
if msg.get("stickers"):
|
||||
for stk in msg["stickers"]:
|
||||
stk_name = stk.get("name", "sticker")
|
||||
content += f" [Sticker: {stk_name}]" if content else f"[Sticker: {stk_name}]"
|
||||
|
||||
if not content:
|
||||
continue
|
||||
|
||||
author = msg["author"]
|
||||
user = author.get("name") or author.get("global_name") or author.get("username", "Unknown")
|
||||
ts = datetime.fromisoformat(msg["timestamp"]).astimezone()
|
||||
date_str = ts.strftime("%Y-%m-%d")
|
||||
time_str = ts.strftime("%H:%M")
|
||||
|
||||
# Add date header if day changed
|
||||
if date_str != last_date:
|
||||
if last_date:
|
||||
lines.append("")
|
||||
lines.append(f"--- {date_str} ---")
|
||||
last_date = date_str
|
||||
|
||||
# Compact consecutive messages from same user
|
||||
if user == last_user:
|
||||
lines.append(f" {content}")
|
||||
else:
|
||||
lines.append(f"[{time_str}] {user}: {content}")
|
||||
last_user = user
|
||||
|
||||
output = "\n".join(lines)
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(output)
|
||||
|
||||
with open(args.json_file, 'r', encoding='utf-8') as f:
|
||||
original_size = len(f.read())
|
||||
|
||||
print(f"Original JSON: {original_size} chars")
|
||||
print(f"Simplified TXT: {len(output)} chars")
|
||||
print(f"Reduction: {100 - (len(output) / original_size * 100):.1f}%")
|
||||
print(f"Output saved to: {output_file}")
|
||||
print(f"\nFirst 2000 chars preview:\n")
|
||||
print(output[:2000].encode('cp1252', errors='replace').decode('cp1252'))
|
||||
Reference in New Issue
Block a user