mirror of
https://github.com/MoonCakeMC/WeChatMsg_fix.git
synced 2026-05-28 01:57:14 +08:00
386 lines
17 KiB
Python
386 lines
17 KiB
Python
import os
|
|
import sys
|
|
import time
|
|
import re
|
|
import jieba
|
|
import collections
|
|
from collections import Counter, defaultdict
|
|
import datetime
|
|
import urllib.request
|
|
import ssl
|
|
import html
|
|
|
|
# Add project root to path
|
|
project_root = os.path.dirname(os.path.abspath(__file__))
|
|
sys.path.insert(0, project_root)
|
|
|
|
from wxManager import DatabaseConnection, MessageType
|
|
from wxManager.model import Me
|
|
|
|
def generate_report_data():
|
|
print("开始生成个性化年度报告数据...")
|
|
|
|
# 1. Setup DB
|
|
db_dir = r'e:\WeChatMsg\wxid_g4pshorcc0r529\db_storage'
|
|
db_version = 4
|
|
conn = DatabaseConnection(db_dir, db_version)
|
|
db = conn.get_interface()
|
|
|
|
# Load Self Info
|
|
Me().load_from_json(os.path.join(db_dir, 'info.json'))
|
|
self_wxid = Me().wxid
|
|
self_name = Me().name
|
|
print(f"当前用户: {self_name} ({self_wxid})")
|
|
|
|
# 2. Setup Paths
|
|
report_root = r"e:\WeChatMsg\AnnualReport\report-2025\single"
|
|
js_file = os.path.join(report_root, "src", "js", "getdata.js")
|
|
avatar_dir = os.path.join(report_root, "public", "header")
|
|
|
|
if not os.path.exists(avatar_dir):
|
|
os.makedirs(avatar_dir)
|
|
|
|
# 3. Helper: Save Avatar
|
|
def get_avatar_path(wxid):
|
|
# Try to get avatar buffer
|
|
try:
|
|
buf = db.get_avatar_buffer(wxid)
|
|
if buf:
|
|
filename = f"{wxid}.jpg"
|
|
filepath = os.path.join(avatar_dir, filename)
|
|
with open(filepath, 'wb') as f:
|
|
f.write(buf)
|
|
return f"'./header/{filename}'"
|
|
except Exception as e:
|
|
pass
|
|
return "'./header/header12.webp'" # Default
|
|
|
|
# Save Self Avatar
|
|
self_avatar_src = get_avatar_path(self_wxid)
|
|
|
|
# 4. Helper: Get Name
|
|
contact_cache = {}
|
|
all_contacts = db.get_contacts()
|
|
for c in all_contacts:
|
|
contact_cache[c.wxid] = c
|
|
|
|
def get_name(wxid):
|
|
if wxid in contact_cache:
|
|
c = contact_cache[wxid]
|
|
# Prefer remark, then nickname, then wxid
|
|
if hasattr(c, 'remark') and c.remark:
|
|
return c.remark
|
|
if hasattr(c, 'nickname') and c.nickname:
|
|
return c.nickname
|
|
return wxid
|
|
|
|
# 5. Analyze Messages
|
|
print("正在分析消息记录 (仅统计私聊)...")
|
|
|
|
# Stats
|
|
total_sent = 0
|
|
total_received = 0
|
|
total_words = 0
|
|
|
|
# Time stats
|
|
hour_counts = [0] * 24
|
|
daily_msg_counts = defaultdict(int) # '2025-01-01' -> count
|
|
|
|
# Friend stats
|
|
friend_msg_counts = Counter()
|
|
friend_word_counts = Counter()
|
|
friend_monthly_counts = defaultdict(lambda: defaultdict(int)) # '1月' -> {wxid: count}
|
|
|
|
# Emoji stats
|
|
emoji_counter = Counter()
|
|
emoji_urls = {}
|
|
|
|
# Keywords
|
|
text_content = []
|
|
|
|
# Date range for 2025
|
|
start_2025 = datetime.datetime(2025, 1, 1).timestamp()
|
|
end_2025 = datetime.datetime(2026, 1, 1).timestamp()
|
|
|
|
sessions = db.session_db.get_session()
|
|
session_users = [s[0] for s in sessions]
|
|
|
|
processed_count = 0
|
|
for username in session_users:
|
|
processed_count += 1
|
|
if processed_count % 50 == 0:
|
|
print(f"已处理 {processed_count}/{len(session_users)} 个会话...")
|
|
|
|
# STRICT FILTER: Only private chats
|
|
# Exclude chatrooms, official accounts (gh_), filehelper, openim (Enterprise WeChat), and specific IDs
|
|
if username.endswith('@chatroom') or username.startswith('gh_') or username == 'filehelper' or username.endswith('@openim') or username.endswith('@qy_u') or username == 'jQ4jTweaBCAFtdK':
|
|
continue
|
|
|
|
msgs = db.get_messages(username)
|
|
if not msgs: continue
|
|
|
|
for msg in msgs:
|
|
ts = msg.timestamp
|
|
if ts <= 0: continue
|
|
|
|
# Only count 2025 data for the report?
|
|
# The user complained about "960 days". Let's focus on 2025 for the main charts.
|
|
# But for "Total Days", we might check min/max of all time.
|
|
# Let's stick to 2025 for the "Annual" part.
|
|
|
|
dt = datetime.datetime.fromtimestamp(ts)
|
|
date_str = dt.strftime('%Y-%m-%d')
|
|
|
|
# Global stats (All time or 2025? Usually annual report is for that year)
|
|
# Let's filter for 2025 for the report content
|
|
if start_2025 <= ts < end_2025:
|
|
daily_msg_counts[date_str] += 1
|
|
hour_counts[dt.hour] += 1
|
|
|
|
if msg.is_sender:
|
|
total_sent += 1
|
|
else:
|
|
total_received += 1
|
|
|
|
if msg.type == MessageType.Text and msg.content:
|
|
l = len(msg.content)
|
|
total_words += l
|
|
friend_msg_counts[username] += 1
|
|
friend_word_counts[username] += l
|
|
|
|
month_key = f"{dt.month}月"
|
|
friend_monthly_counts[month_key][username] += 1
|
|
|
|
# Keywords source - ONLY FROM SENDER (ME)
|
|
if msg.is_sender and len(text_content) < 50000: # Limit for memory
|
|
text_content.append(msg.content)
|
|
|
|
elif msg.type == 47 and msg.is_sender:
|
|
# Emoji
|
|
if hasattr(msg, 'md5') and msg.md5:
|
|
emoji_counter[msg.md5] += 1
|
|
if hasattr(msg, 'url') and msg.url:
|
|
emoji_urls[msg.md5] = msg.url
|
|
|
|
# 6. Process Data
|
|
print("正在计算统计数据...")
|
|
|
|
# Days in 2025 (so far)
|
|
# If today is in 2025, use today. If later, use 365.
|
|
now = datetime.datetime.now()
|
|
if now.year == 2025:
|
|
days_in_year = (now - datetime.datetime(2025, 1, 1)).days + 1
|
|
elif now.year > 2025:
|
|
days_in_year = 365
|
|
else:
|
|
days_in_year = 1 # Should not happen based on context
|
|
|
|
# Top Friends
|
|
top_friends = friend_msg_counts.most_common(5)
|
|
chat_friends_data = []
|
|
for wxid, count in top_friends:
|
|
chat_friends_data.append({
|
|
'name': get_name(wxid),
|
|
'messageCount': f"{count}条消息",
|
|
'wordCount': f"{friend_word_counts[wxid]}字",
|
|
'avatarSrc': get_avatar_path(wxid)
|
|
})
|
|
|
|
# Monthly Top Friends
|
|
month_friends_data = []
|
|
for i in range(1, 13):
|
|
m_key = f"{i}月"
|
|
if m_key in friend_monthly_counts:
|
|
top_month = max(friend_monthly_counts[m_key].items(), key=lambda x: x[1])
|
|
wxid = top_month[0]
|
|
month_friends_data.append({
|
|
'month': m_key,
|
|
'nickname': get_name(wxid),
|
|
'className': 'passion',
|
|
'num': top_month[1],
|
|
'avatar': get_avatar_path(wxid)
|
|
})
|
|
|
|
# Keywords
|
|
print("正在生成关键词...")
|
|
# full_text = "\n".join(text_content) # Changed to per-message processing
|
|
word_counter = Counter()
|
|
stop_words = {
|
|
'的', '了', '我', '是', '你', '在', '也', '就', '不', '有', '啊', '吧', '吗', '呢', '哈', '去', '都', '那', '一个', '这个', '什么', '怎么', '可以', '知道', '现在', '今天', '就是', '还是', '没有', '不是', '但是', '因为', '所以', '如果', '那个', '觉得', '其实', '应该', '可能', '然后', '时候', '感觉', '一下', '一点', '真的', '已经', '只是', '出来', '起来', '看着', '看到', '自己', '我们', '你们', '他们', '图片', '表情', '收到', '链接', '视频', '语音', 'https', 'http', 'com', 'cn', 'www', '美团', '红包', 'net', 'org', 'html', 'htm',
|
|
'或者', '还有', '微信', '没事', '直接', '明天', '数据', '消息', '东西', '里面', '问号', '朋友', '人家', '之前', '哪个', '开始', '问题', '感情', '晚上', '意思', '学校', '手机', '不能', '一会', '这种', '宝宝', '不行', '我草', '谢谢', '多少', '不到', '的话', '别人', '我要', '左右', '组家', '警这种关系左石东西号1号0号0老狐函我草HШ李睢', '_别乐w月', '或者192', '捂脸', '笑哭', '呲牙', '偷笑', '调皮', '阴险', '晕', '衰', '骷髅', '敲打', '再见', '擦汗', '抠鼻', '鼓掌', '糗大了', '坏笑', '左哼哼', '右哼哼', '哈欠', '鄙视', '委屈', '快哭了', '亲亲', '吓', '可怜', '菜刀', '西瓜', '啤酒', '篮球', '乒乓', '咖啡', '饭', '猪头', '玫瑰', '凋谢', '示爱', '爱心', '心碎', '蛋糕', '闪电', '炸弹', '刀', '足球', '瓢虫', '便便', '月亮', '太阳', '礼物', '拥抱', '强', '弱', '握手', '胜利', '抱拳', '勾引', '拳头', '差劲', '爱你', 'NO', 'OK', '爱情', '飞吻', '跳跳', '发抖', '怄火', '转圈', '磕头', '回头', '跳绳', '挥手', '激动', '街舞', '献吻', '左太极', '右太极'
|
|
}
|
|
|
|
for msg_text in text_content:
|
|
words = jieba.cut(msg_text)
|
|
unique_words_in_msg = set()
|
|
for w in words:
|
|
# Filter: Length > 1, not in stop_words, not digit, not containing digits
|
|
if len(w) > 1 and w not in stop_words and not w.isdigit() and not any(char.isdigit() for char in w):
|
|
# Strict filter: Must contain at least one Chinese character or be a valid English word
|
|
# This filters out garbage like "HШ" or random symbols
|
|
if re.search(r'[\u4e00-\u9fa5]', w) or (w.isalpha() and len(w) > 2):
|
|
unique_words_in_msg.add(w)
|
|
word_counter.update(unique_words_in_msg)
|
|
|
|
common_words = word_counter.most_common(50)
|
|
keywords_list = [[w, c] for w, c in common_words]
|
|
|
|
top_keyword = "无"
|
|
top_keyword_num = 0
|
|
if keywords_list:
|
|
top_keyword = keywords_list[0][0]
|
|
top_keyword_num = keywords_list[0][1]
|
|
|
|
# Heatmap Data (Step Data Replacement)
|
|
# Format: [['2025-01-01', 10], ...]
|
|
heatmap_data_js = "[\n"
|
|
for date_str, count in daily_msg_counts.items():
|
|
heatmap_data_js += f" ['{date_str}', {count}],\n"
|
|
heatmap_data_js += " ]"
|
|
|
|
# Top Emoji
|
|
top_emoji_src = "'./header/header48.webp'" # Default
|
|
if emoji_counter:
|
|
top_md5, top_count = emoji_counter.most_common(1)[0]
|
|
print(f"最常用表情包 MD5: {top_md5} (使用 {top_count} 次)")
|
|
|
|
cdn_url = emoji_urls.get(top_md5)
|
|
|
|
try:
|
|
if not cdn_url:
|
|
emoticon_db_path = os.path.join(db_dir, 'emoticon', 'emoticon.db')
|
|
if os.path.exists(emoticon_db_path):
|
|
import sqlite3
|
|
conn_emo = sqlite3.connect(emoticon_db_path)
|
|
cursor_emo = conn_emo.cursor()
|
|
cursor_emo.execute("select cdn_url, thumb_url from kNonStoreEmoticonTable where md5=?", (top_md5,))
|
|
row = cursor_emo.fetchone()
|
|
conn_emo.close()
|
|
if row:
|
|
cdn_url = row[0] or row[1]
|
|
|
|
if cdn_url:
|
|
cdn_url = html.unescape(cdn_url)
|
|
print(f"下载表情包: {cdn_url}")
|
|
emoji_filename = f"emoji_{top_md5}.jpg"
|
|
emoji_path = os.path.join(avatar_dir, emoji_filename)
|
|
|
|
ssl_context = ssl._create_unverified_context()
|
|
req = urllib.request.Request(cdn_url, headers={'User-Agent': 'Mozilla/5.0'})
|
|
with urllib.request.urlopen(req, context=ssl_context) as response, open(emoji_path, 'wb') as out_file:
|
|
out_file.write(response.read())
|
|
|
|
top_emoji_src = f"'./header/{emoji_filename}'"
|
|
except Exception as e:
|
|
print(f"获取表情包失败: {e}")
|
|
|
|
# 7. Update File
|
|
print(f"正在更新前端文件: {js_file}")
|
|
|
|
with open(js_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Replace Welcome_data
|
|
# Use self_avatar_src
|
|
welcome_js = f"""export const Welcome_data = reactive({{
|
|
avatarSrc: {self_avatar_src},
|
|
nickname: "{self_name}",
|
|
descriptionText: {{
|
|
hello: "Hello World!",
|
|
text1: "时光荏苒,转眼间我们又走过了一年。",
|
|
text2: "在过去的365天里,从深夜的长谈到清晨的祝福,从好友间的调侃到工作中的忙碌……",
|
|
text4: "这些聊天记录,是属于你的独家记忆。",
|
|
text7: "打开报告,开启你的专属年度记忆吧!"
|
|
}}
|
|
}});"""
|
|
content = re.sub(r'export const Welcome_data = reactive\(\{[\s\S]*?\}\);', welcome_js, content)
|
|
|
|
# Replace statsData
|
|
content = re.sub(r'export var statsData = \{[^}]+\};',
|
|
f'export var statsData = {{\n daysInWeChat: {days_in_year},\n numOfFriends: {len(friend_msg_counts)},\n messagesSent: {total_sent},\n messagesReceived: {total_received},\n totalWords: {total_words}\n}};' ,
|
|
content)
|
|
|
|
# Replace yAxisData (Hours)
|
|
content = re.sub(r'export var yAxisData = \[[^\]]+\];',
|
|
f'export var yAxisData = {hour_counts};',
|
|
content)
|
|
|
|
# Replace chatFriendsData
|
|
friends_js = "export const chatFriendsData = reactive({\n chatFriends:[\n"
|
|
for f in chat_friends_data:
|
|
friends_js += f" {{\n name: \"{f['name']}\",\n messageCount: \"{f['messageCount']}\",\n wordCount: \"{f['wordCount']}\",\n avatarSrc: {f['avatarSrc']}\n }},\n"
|
|
friends_js += " ]\n});"
|
|
content = re.sub(r'export const chatFriendsData = reactive\(\{[\s\S]*?\}\);', friends_js, content)
|
|
|
|
# Replace monthFriendsData
|
|
month_js = "export const monthFriendsData = reactive(\n {\n month_data:[\n"
|
|
for m in month_friends_data:
|
|
month_js += f" {{ month: \"{m['month']}\", nickname: \"{m['nickname']}\", className: \"{m['className']}\", num: {m['num']}, avatar: {m['avatar']} }},\n"
|
|
month_js += " ]\n }\n)"
|
|
content = re.sub(r'export const monthFriendsData = reactive\(\s*\{[\s\S]*?\}\s*\)', month_js, content)
|
|
|
|
# Replace keywordsData
|
|
keywords_js = "export const keywordsData = reactive({\n"
|
|
keywords_js += f" keyword: \"{top_keyword}\",\n"
|
|
keywords_js += f" keyword_num: {top_keyword_num},\n"
|
|
keywords_js += " messages : [],\n" # Clear dummy messages
|
|
keywords_js += " chart_option:{},\n"
|
|
keywords_js += " word_counter:[\n"
|
|
for w, c in keywords_list:
|
|
keywords_js += f" [\"{w}\",{c}],\n"
|
|
keywords_js += " ]\n});"
|
|
content = re.sub(r'export const keywordsData = reactive\(\{[\s\S]*?\}\);', keywords_js, content)
|
|
|
|
# Replace stepData (Heatmap)
|
|
# Find "export var stepData = ...;"
|
|
# It might be "getVirtualData('2024')" in the original file
|
|
content = re.sub(r'export var\s+stepData\s+=\s+[^;]+;', f'export var stepData = {heatmap_data_js};', content)
|
|
|
|
# Update stepdescription to "Message Activity"
|
|
step_desc_js = """export const stepdescription = {
|
|
sumUp: '热络的每一天',
|
|
left: {
|
|
totalStepsPrefix: '年度活跃天数',
|
|
totalSteps: %d,
|
|
distancePrefix: '累计互动',
|
|
distance: %d,
|
|
distanceSuffix: '次',
|
|
earthPrefix: '超过了',
|
|
earthRounds: 99,
|
|
earthSuffix: '%%的用户',
|
|
},
|
|
right: {
|
|
year: '2025',
|
|
month: '12',
|
|
day: '31',
|
|
stepsPrefix: '单日最高',
|
|
steps: %d,
|
|
stepsSuffix: '条',
|
|
message: '这一天,你们的对话仿佛没有尽头',
|
|
},
|
|
};""" % (len(daily_msg_counts), total_sent + total_received, max(daily_msg_counts.values()) if daily_msg_counts else 0)
|
|
|
|
content = re.sub(r'export const stepdescription = \{[\s\S]*?\};', step_desc_js, content)
|
|
|
|
# Update Summary Card (wechatReportData)
|
|
summary_friends_js = "friends : [\n"
|
|
for f in chat_friends_data:
|
|
summary_friends_js += f" {{ name: '{f['name']}', avatarSrc: {f['avatarSrc']} }},\n"
|
|
summary_friends_js += " ],"
|
|
content = re.sub(r'friends : \[[\s\S]*?\],', summary_friends_js, content)
|
|
|
|
content = re.sub(r"\{ label: '聊天联系人', value: \d+, unit: '人' \}", f"{{ label: '聊天联系人', value: {len(friend_msg_counts)}, unit: '人' }}", content)
|
|
content = re.sub(r"\{ label: '发送消息', value: \d+, unit: '条' \}", f"{{ label: '发送消息', value: {total_sent}, unit: '条' }}", content)
|
|
content = re.sub(r"\{ label: '收到消息', value: \d+, unit: '条' \}", f"{{ label: '收到消息', value: {total_received}, unit: '条' }}", content)
|
|
content = re.sub(r"\{ label: '发送总字数', value: \d+, unit: '' \}", f"{{ label: '发送总字数', value: {total_words}, unit: '' }}", content)
|
|
content = re.sub(r"\{ label: '年度关键词', value: '[^']+' \}", f"{{ label: '年度关键词', value: '{top_keyword}' }}", content)
|
|
content = re.sub(r"\{ label: '常用表情包', image: '[^']+' \}", f"{{ label: '常用表情包', image: {top_emoji_src} }}", content)
|
|
|
|
with open(js_file, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
|
|
print("生成完成!请刷新网页查看。")
|
|
|
|
if __name__ == '__main__':
|
|
generate_report_data()
|