Files
WeChatMsg_fix/generate_report_data.py
T
2025-12-24 22:52:35 +08:00

386 lines
17 KiB
Python

import os
import sys
import time
import re
import jieba
import collections
from collections import Counter, defaultdict
import datetime
import urllib.request
import ssl
import html
# Add project root to path
project_root = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, project_root)
from wxManager import DatabaseConnection, MessageType
from wxManager.model import Me
def generate_report_data():
print("开始生成个性化年度报告数据...")
# 1. Setup DB
db_dir = r'e:\WeChatMsg\wxid_g4pshorcc0r529\db_storage'
db_version = 4
conn = DatabaseConnection(db_dir, db_version)
db = conn.get_interface()
# Load Self Info
Me().load_from_json(os.path.join(db_dir, 'info.json'))
self_wxid = Me().wxid
self_name = Me().name
print(f"当前用户: {self_name} ({self_wxid})")
# 2. Setup Paths
report_root = r"e:\WeChatMsg\AnnualReport\report-2025\single"
js_file = os.path.join(report_root, "src", "js", "getdata.js")
avatar_dir = os.path.join(report_root, "public", "header")
if not os.path.exists(avatar_dir):
os.makedirs(avatar_dir)
# 3. Helper: Save Avatar
def get_avatar_path(wxid):
# Try to get avatar buffer
try:
buf = db.get_avatar_buffer(wxid)
if buf:
filename = f"{wxid}.jpg"
filepath = os.path.join(avatar_dir, filename)
with open(filepath, 'wb') as f:
f.write(buf)
return f"'./header/{filename}'"
except Exception as e:
pass
return "'./header/header12.webp'" # Default
# Save Self Avatar
self_avatar_src = get_avatar_path(self_wxid)
# 4. Helper: Get Name
contact_cache = {}
all_contacts = db.get_contacts()
for c in all_contacts:
contact_cache[c.wxid] = c
def get_name(wxid):
if wxid in contact_cache:
c = contact_cache[wxid]
# Prefer remark, then nickname, then wxid
if hasattr(c, 'remark') and c.remark:
return c.remark
if hasattr(c, 'nickname') and c.nickname:
return c.nickname
return wxid
# 5. Analyze Messages
print("正在分析消息记录 (仅统计私聊)...")
# Stats
total_sent = 0
total_received = 0
total_words = 0
# Time stats
hour_counts = [0] * 24
daily_msg_counts = defaultdict(int) # '2025-01-01' -> count
# Friend stats
friend_msg_counts = Counter()
friend_word_counts = Counter()
friend_monthly_counts = defaultdict(lambda: defaultdict(int)) # '1月' -> {wxid: count}
# Emoji stats
emoji_counter = Counter()
emoji_urls = {}
# Keywords
text_content = []
# Date range for 2025
start_2025 = datetime.datetime(2025, 1, 1).timestamp()
end_2025 = datetime.datetime(2026, 1, 1).timestamp()
sessions = db.session_db.get_session()
session_users = [s[0] for s in sessions]
processed_count = 0
for username in session_users:
processed_count += 1
if processed_count % 50 == 0:
print(f"已处理 {processed_count}/{len(session_users)} 个会话...")
# STRICT FILTER: Only private chats
# Exclude chatrooms, official accounts (gh_), filehelper, openim (Enterprise WeChat), and specific IDs
if username.endswith('@chatroom') or username.startswith('gh_') or username == 'filehelper' or username.endswith('@openim') or username.endswith('@qy_u') or username == 'jQ4jTweaBCAFtdK':
continue
msgs = db.get_messages(username)
if not msgs: continue
for msg in msgs:
ts = msg.timestamp
if ts <= 0: continue
# Only count 2025 data for the report?
# The user complained about "960 days". Let's focus on 2025 for the main charts.
# But for "Total Days", we might check min/max of all time.
# Let's stick to 2025 for the "Annual" part.
dt = datetime.datetime.fromtimestamp(ts)
date_str = dt.strftime('%Y-%m-%d')
# Global stats (All time or 2025? Usually annual report is for that year)
# Let's filter for 2025 for the report content
if start_2025 <= ts < end_2025:
daily_msg_counts[date_str] += 1
hour_counts[dt.hour] += 1
if msg.is_sender:
total_sent += 1
else:
total_received += 1
if msg.type == MessageType.Text and msg.content:
l = len(msg.content)
total_words += l
friend_msg_counts[username] += 1
friend_word_counts[username] += l
month_key = f"{dt.month}"
friend_monthly_counts[month_key][username] += 1
# Keywords source - ONLY FROM SENDER (ME)
if msg.is_sender and len(text_content) < 50000: # Limit for memory
text_content.append(msg.content)
elif msg.type == 47 and msg.is_sender:
# Emoji
if hasattr(msg, 'md5') and msg.md5:
emoji_counter[msg.md5] += 1
if hasattr(msg, 'url') and msg.url:
emoji_urls[msg.md5] = msg.url
# 6. Process Data
print("正在计算统计数据...")
# Days in 2025 (so far)
# If today is in 2025, use today. If later, use 365.
now = datetime.datetime.now()
if now.year == 2025:
days_in_year = (now - datetime.datetime(2025, 1, 1)).days + 1
elif now.year > 2025:
days_in_year = 365
else:
days_in_year = 1 # Should not happen based on context
# Top Friends
top_friends = friend_msg_counts.most_common(5)
chat_friends_data = []
for wxid, count in top_friends:
chat_friends_data.append({
'name': get_name(wxid),
'messageCount': f"{count}条消息",
'wordCount': f"{friend_word_counts[wxid]}",
'avatarSrc': get_avatar_path(wxid)
})
# Monthly Top Friends
month_friends_data = []
for i in range(1, 13):
m_key = f"{i}"
if m_key in friend_monthly_counts:
top_month = max(friend_monthly_counts[m_key].items(), key=lambda x: x[1])
wxid = top_month[0]
month_friends_data.append({
'month': m_key,
'nickname': get_name(wxid),
'className': 'passion',
'num': top_month[1],
'avatar': get_avatar_path(wxid)
})
# Keywords
print("正在生成关键词...")
# full_text = "\n".join(text_content) # Changed to per-message processing
word_counter = Counter()
stop_words = {
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '一个', '这个', '什么', '怎么', '可以', '知道', '现在', '今天', '就是', '还是', '没有', '不是', '但是', '因为', '所以', '如果', '那个', '觉得', '其实', '应该', '可能', '然后', '时候', '感觉', '一下', '一点', '真的', '已经', '只是', '出来', '起来', '看着', '看到', '自己', '我们', '你们', '他们', '图片', '表情', '收到', '链接', '视频', '语音', 'https', 'http', 'com', 'cn', 'www', '美团', '红包', 'net', 'org', 'html', 'htm',
'或者', '还有', '微信', '没事', '直接', '明天', '数据', '消息', '东西', '里面', '问号', '朋友', '人家', '之前', '哪个', '开始', '问题', '感情', '晚上', '意思', '学校', '手机', '不能', '一会', '这种', '宝宝', '不行', '我草', '谢谢', '多少', '不到', '的话', '别人', '我要', '左右', '组家', '警这种关系左石东西号1号0号0老狐函我草HШ李睢', '_别乐w月', '或者192', '捂脸', '笑哭', '呲牙', '偷笑', '调皮', '阴险', '', '', '骷髅', '敲打', '再见', '擦汗', '抠鼻', '鼓掌', '糗大了', '坏笑', '左哼哼', '右哼哼', '哈欠', '鄙视', '委屈', '快哭了', '亲亲', '', '可怜', '菜刀', '西瓜', '啤酒', '篮球', '乒乓', '咖啡', '', '猪头', '玫瑰', '凋谢', '示爱', '爱心', '心碎', '蛋糕', '闪电', '炸弹', '', '足球', '瓢虫', '便便', '月亮', '太阳', '礼物', '拥抱', '', '', '握手', '胜利', '抱拳', '勾引', '拳头', '差劲', '爱你', 'NO', 'OK', '爱情', '飞吻', '跳跳', '发抖', '怄火', '转圈', '磕头', '回头', '跳绳', '挥手', '激动', '街舞', '献吻', '左太极', '右太极'
}
for msg_text in text_content:
words = jieba.cut(msg_text)
unique_words_in_msg = set()
for w in words:
# Filter: Length > 1, not in stop_words, not digit, not containing digits
if len(w) > 1 and w not in stop_words and not w.isdigit() and not any(char.isdigit() for char in w):
# Strict filter: Must contain at least one Chinese character or be a valid English word
# This filters out garbage like "HШ" or random symbols
if re.search(r'[\u4e00-\u9fa5]', w) or (w.isalpha() and len(w) > 2):
unique_words_in_msg.add(w)
word_counter.update(unique_words_in_msg)
common_words = word_counter.most_common(50)
keywords_list = [[w, c] for w, c in common_words]
top_keyword = ""
top_keyword_num = 0
if keywords_list:
top_keyword = keywords_list[0][0]
top_keyword_num = keywords_list[0][1]
# Heatmap Data (Step Data Replacement)
# Format: [['2025-01-01', 10], ...]
heatmap_data_js = "[\n"
for date_str, count in daily_msg_counts.items():
heatmap_data_js += f" ['{date_str}', {count}],\n"
heatmap_data_js += " ]"
# Top Emoji
top_emoji_src = "'./header/header48.webp'" # Default
if emoji_counter:
top_md5, top_count = emoji_counter.most_common(1)[0]
print(f"最常用表情包 MD5: {top_md5} (使用 {top_count} 次)")
cdn_url = emoji_urls.get(top_md5)
try:
if not cdn_url:
emoticon_db_path = os.path.join(db_dir, 'emoticon', 'emoticon.db')
if os.path.exists(emoticon_db_path):
import sqlite3
conn_emo = sqlite3.connect(emoticon_db_path)
cursor_emo = conn_emo.cursor()
cursor_emo.execute("select cdn_url, thumb_url from kNonStoreEmoticonTable where md5=?", (top_md5,))
row = cursor_emo.fetchone()
conn_emo.close()
if row:
cdn_url = row[0] or row[1]
if cdn_url:
cdn_url = html.unescape(cdn_url)
print(f"下载表情包: {cdn_url}")
emoji_filename = f"emoji_{top_md5}.jpg"
emoji_path = os.path.join(avatar_dir, emoji_filename)
ssl_context = ssl._create_unverified_context()
req = urllib.request.Request(cdn_url, headers={'User-Agent': 'Mozilla/5.0'})
with urllib.request.urlopen(req, context=ssl_context) as response, open(emoji_path, 'wb') as out_file:
out_file.write(response.read())
top_emoji_src = f"'./header/{emoji_filename}'"
except Exception as e:
print(f"获取表情包失败: {e}")
# 7. Update File
print(f"正在更新前端文件: {js_file}")
with open(js_file, 'r', encoding='utf-8') as f:
content = f.read()
# Replace Welcome_data
# Use self_avatar_src
welcome_js = f"""export const Welcome_data = reactive({{
avatarSrc: {self_avatar_src},
nickname: "{self_name}",
descriptionText: {{
hello: "Hello World!",
text1: "时光荏苒,转眼间我们又走过了一年。",
text2: "在过去的365天里,从深夜的长谈到清晨的祝福,从好友间的调侃到工作中的忙碌……",
text4: "这些聊天记录,是属于你的独家记忆。",
text7: "打开报告,开启你的专属年度记忆吧!"
}}
}});"""
content = re.sub(r'export const Welcome_data = reactive\(\{[\s\S]*?\}\);', welcome_js, content)
# Replace statsData
content = re.sub(r'export var statsData = \{[^}]+\};',
f'export var statsData = {{\n daysInWeChat: {days_in_year},\n numOfFriends: {len(friend_msg_counts)},\n messagesSent: {total_sent},\n messagesReceived: {total_received},\n totalWords: {total_words}\n}};' ,
content)
# Replace yAxisData (Hours)
content = re.sub(r'export var yAxisData = \[[^\]]+\];',
f'export var yAxisData = {hour_counts};',
content)
# Replace chatFriendsData
friends_js = "export const chatFriendsData = reactive({\n chatFriends:[\n"
for f in chat_friends_data:
friends_js += f" {{\n name: \"{f['name']}\",\n messageCount: \"{f['messageCount']}\",\n wordCount: \"{f['wordCount']}\",\n avatarSrc: {f['avatarSrc']}\n }},\n"
friends_js += " ]\n});"
content = re.sub(r'export const chatFriendsData = reactive\(\{[\s\S]*?\}\);', friends_js, content)
# Replace monthFriendsData
month_js = "export const monthFriendsData = reactive(\n {\n month_data:[\n"
for m in month_friends_data:
month_js += f" {{ month: \"{m['month']}\", nickname: \"{m['nickname']}\", className: \"{m['className']}\", num: {m['num']}, avatar: {m['avatar']} }},\n"
month_js += " ]\n }\n)"
content = re.sub(r'export const monthFriendsData = reactive\(\s*\{[\s\S]*?\}\s*\)', month_js, content)
# Replace keywordsData
keywords_js = "export const keywordsData = reactive({\n"
keywords_js += f" keyword: \"{top_keyword}\",\n"
keywords_js += f" keyword_num: {top_keyword_num},\n"
keywords_js += " messages : [],\n" # Clear dummy messages
keywords_js += " chart_option:{},\n"
keywords_js += " word_counter:[\n"
for w, c in keywords_list:
keywords_js += f" [\"{w}\",{c}],\n"
keywords_js += " ]\n});"
content = re.sub(r'export const keywordsData = reactive\(\{[\s\S]*?\}\);', keywords_js, content)
# Replace stepData (Heatmap)
# Find "export var stepData = ...;"
# It might be "getVirtualData('2024')" in the original file
content = re.sub(r'export var\s+stepData\s+=\s+[^;]+;', f'export var stepData = {heatmap_data_js};', content)
# Update stepdescription to "Message Activity"
step_desc_js = """export const stepdescription = {
sumUp: '热络的每一天',
left: {
totalStepsPrefix: '年度活跃天数',
totalSteps: %d,
distancePrefix: '累计互动',
distance: %d,
distanceSuffix: '',
earthPrefix: '超过了',
earthRounds: 99,
earthSuffix: '%%的用户',
},
right: {
year: '2025',
month: '12',
day: '31',
stepsPrefix: '单日最高',
steps: %d,
stepsSuffix: '',
message: '这一天,你们的对话仿佛没有尽头',
},
};""" % (len(daily_msg_counts), total_sent + total_received, max(daily_msg_counts.values()) if daily_msg_counts else 0)
content = re.sub(r'export const stepdescription = \{[\s\S]*?\};', step_desc_js, content)
# Update Summary Card (wechatReportData)
summary_friends_js = "friends : [\n"
for f in chat_friends_data:
summary_friends_js += f" {{ name: '{f['name']}', avatarSrc: {f['avatarSrc']} }},\n"
summary_friends_js += " ],"
content = re.sub(r'friends : \[[\s\S]*?\],', summary_friends_js, content)
content = re.sub(r"\{ label: '聊天联系人', value: \d+, unit: '' \}", f"{{ label: '聊天联系人', value: {len(friend_msg_counts)}, unit: '' }}", content)
content = re.sub(r"\{ label: '发送消息', value: \d+, unit: '' \}", f"{{ label: '发送消息', value: {total_sent}, unit: '' }}", content)
content = re.sub(r"\{ label: '收到消息', value: \d+, unit: '' \}", f"{{ label: '收到消息', value: {total_received}, unit: '' }}", content)
content = re.sub(r"\{ label: '发送总字数', value: \d+, unit: '' \}", f"{{ label: '发送总字数', value: {total_words}, unit: '' }}", content)
content = re.sub(r"\{ label: '年度关键词', value: '[^']+' \}", f"{{ label: '年度关键词', value: '{top_keyword}' }}", content)
content = re.sub(r"\{ label: '常用表情包', image: '[^']+' \}", f"{{ label: '常用表情包', image: {top_emoji_src} }}", content)
with open(js_file, 'w', encoding='utf-8') as f:
f.write(content)
print("生成完成!请刷新网页查看。")
if __name__ == '__main__':
generate_report_data()