mirror of
https://github.com/MoonCakeMC/WeChatMsg_fix.git
synced 2026-03-20 17:27:48 +08:00
429 lines
19 KiB
Python
429 lines
19 KiB
Python
import os
|
||
import sys
|
||
import time
|
||
import re
|
||
import jieba
|
||
import collections
|
||
from collections import Counter, defaultdict
|
||
import datetime
|
||
import urllib.request
|
||
import ssl
|
||
import html
|
||
|
||
# Add project root to path
|
||
project_root = os.path.dirname(os.path.abspath(__file__))
|
||
sys.path.insert(0, project_root)
|
||
|
||
from wxManager import DatabaseConnection, MessageType
|
||
from wxManager.model import Me
|
||
from wxManager.parser.link_parser import wx_sport
|
||
|
||
def generate_report_data():
|
||
print("开始生成个性化年度报告数据...")
|
||
|
||
# 1. Setup DB
|
||
db_dir = r'e:\WeChatMsg\wxid_g4pshorcc0r529\db_storage'
|
||
db_version = 4
|
||
conn = DatabaseConnection(db_dir, db_version)
|
||
db = conn.get_interface()
|
||
|
||
# Load Self Info
|
||
Me().load_from_json(os.path.join(db_dir, 'info.json'))
|
||
self_wxid = Me().wxid
|
||
self_name = Me().name
|
||
print(f"当前用户: {self_name} ({self_wxid})")
|
||
|
||
# 2. Setup Paths
|
||
report_root = r"e:\WeChatMsg\AnnualReport\report-2025\single"
|
||
js_file = os.path.join(report_root, "src", "js", "getdata.js")
|
||
avatar_dir = os.path.join(report_root, "public", "header")
|
||
|
||
if not os.path.exists(avatar_dir):
|
||
os.makedirs(avatar_dir)
|
||
|
||
# 3. Helper: Save Avatar
|
||
def get_avatar_path(wxid):
|
||
# Try to get avatar buffer
|
||
try:
|
||
buf = db.get_avatar_buffer(wxid)
|
||
if buf:
|
||
filename = f"{wxid}.jpg"
|
||
filepath = os.path.join(avatar_dir, filename)
|
||
with open(filepath, 'wb') as f:
|
||
f.write(buf)
|
||
return f"'./header/{filename}'"
|
||
except Exception as e:
|
||
pass
|
||
return "'./header/header12.webp'" # Default
|
||
|
||
# Save Self Avatar
|
||
self_avatar_src = get_avatar_path(self_wxid)
|
||
|
||
# 4. Helper: Get Name
|
||
contact_cache = {}
|
||
all_contacts = db.get_contacts()
|
||
for c in all_contacts:
|
||
contact_cache[c.wxid] = c
|
||
|
||
def get_name(wxid):
|
||
if wxid in contact_cache:
|
||
c = contact_cache[wxid]
|
||
# Prefer remark, then nickname, then wxid
|
||
if hasattr(c, 'remark') and c.remark:
|
||
return c.remark
|
||
if hasattr(c, 'nickname') and c.nickname:
|
||
return c.nickname
|
||
return wxid
|
||
|
||
# 5. Analyze Messages
|
||
print("正在分析消息记录 (仅统计私聊)...")
|
||
|
||
# Stats
|
||
total_sent = 0
|
||
total_received = 0
|
||
total_words = 0
|
||
|
||
# Time stats
|
||
hour_counts = [0] * 24
|
||
daily_msg_counts = defaultdict(int) # '2025-01-01' -> count
|
||
|
||
# Friend stats
|
||
friend_msg_counts = Counter()
|
||
friend_word_counts = Counter()
|
||
friend_monthly_counts = defaultdict(lambda: defaultdict(int)) # '1月' -> {wxid: count}
|
||
|
||
# Emoji stats
|
||
emoji_counter = Counter()
|
||
emoji_urls = {}
|
||
|
||
# Keywords
|
||
text_content = []
|
||
|
||
# Step data (微信运动)
|
||
daily_step_counts = defaultdict(int) # '2025-01-01' -> steps
|
||
|
||
# Date range for 2025
|
||
start_2025 = datetime.datetime(2025, 1, 1).timestamp()
|
||
end_2025 = datetime.datetime(2026, 1, 1).timestamp()
|
||
|
||
sessions = db.session_db.get_session()
|
||
session_users = [s[0] for s in sessions]
|
||
|
||
processed_count = 0
|
||
for username in session_users:
|
||
processed_count += 1
|
||
if processed_count % 50 == 0:
|
||
print(f"已处理 {processed_count}/{len(session_users)} 个会话...")
|
||
|
||
# STRICT FILTER: Only private chats
|
||
# Exclude chatrooms, official accounts (gh_), filehelper, openim (Enterprise WeChat), and specific IDs
|
||
# BUT we need to read "微信运动" (gh_43f2581f6fd6) for step data
|
||
if username == 'gh_43f2581f6fd6':
|
||
# 微信运动公众号 - 读取步数数据
|
||
sport_msgs = db.get_messages(username)
|
||
if sport_msgs:
|
||
for msg in sport_msgs:
|
||
ts = msg.timestamp
|
||
if ts <= 0: continue
|
||
dt = datetime.datetime.fromtimestamp(ts)
|
||
date_str = dt.strftime('%Y-%m-%d')
|
||
if start_2025 <= ts < end_2025:
|
||
if msg.type == MessageType.LinkMessage and hasattr(msg, 'xml_content') and msg.xml_content:
|
||
try:
|
||
sport_data = wx_sport(msg.xml_content)
|
||
if sport_data and sport_data.get('score'):
|
||
score_str = str(sport_data.get('score', '0')).replace(',', '')
|
||
steps = int(score_str) if score_str.isdigit() else 0
|
||
if steps > 0:
|
||
# 取每天最大的步数(可能有多条记录)
|
||
daily_step_counts[date_str] = max(daily_step_counts[date_str], steps)
|
||
except:
|
||
pass
|
||
continue
|
||
|
||
if username.endswith('@chatroom') or username.startswith('gh_') or username == 'filehelper' or username.endswith('@openim') or username.endswith('@qy_u') or username == 'jQ4jTweaBCAFtdK':
|
||
continue
|
||
|
||
msgs = db.get_messages(username)
|
||
if not msgs: continue
|
||
|
||
for msg in msgs:
|
||
ts = msg.timestamp
|
||
if ts <= 0: continue
|
||
|
||
# Only count 2025 data for the report?
|
||
# The user complained about "960 days". Let's focus on 2025 for the main charts.
|
||
# But for "Total Days", we might check min/max of all time.
|
||
# Let's stick to 2025 for the "Annual" part.
|
||
|
||
dt = datetime.datetime.fromtimestamp(ts)
|
||
date_str = dt.strftime('%Y-%m-%d')
|
||
|
||
# Global stats (All time or 2025? Usually annual report is for that year)
|
||
# Let's filter for 2025 for the report content
|
||
if start_2025 <= ts < end_2025:
|
||
daily_msg_counts[date_str] += 1
|
||
hour_counts[dt.hour] += 1
|
||
|
||
if msg.is_sender:
|
||
total_sent += 1
|
||
else:
|
||
total_received += 1
|
||
|
||
if msg.type == MessageType.Text and msg.content:
|
||
l = len(msg.content)
|
||
total_words += l
|
||
friend_msg_counts[username] += 1
|
||
friend_word_counts[username] += l
|
||
|
||
month_key = f"{dt.month}月"
|
||
friend_monthly_counts[month_key][username] += 1
|
||
|
||
# Keywords source - ONLY FROM SENDER (ME)
|
||
if msg.is_sender and len(text_content) < 50000: # Limit for memory
|
||
text_content.append(msg.content)
|
||
|
||
elif msg.type == 47 and msg.is_sender:
|
||
# Emoji
|
||
if hasattr(msg, 'md5') and msg.md5:
|
||
emoji_counter[msg.md5] += 1
|
||
if hasattr(msg, 'url') and msg.url:
|
||
emoji_urls[msg.md5] = msg.url
|
||
|
||
# 6. Process Data
|
||
print("正在计算统计数据...")
|
||
print(f"读取到 {len(daily_step_counts)} 天的步数数据,总计 {sum(daily_step_counts.values())} 步")
|
||
|
||
# Days in 2025 (so far)
|
||
# If today is in 2025, use today. If later, use 365.
|
||
now = datetime.datetime.now()
|
||
if now.year == 2025:
|
||
days_in_year = (now - datetime.datetime(2025, 1, 1)).days + 1
|
||
elif now.year > 2025:
|
||
days_in_year = 365
|
||
else:
|
||
days_in_year = 1 # Should not happen based on context
|
||
|
||
# Top Friends
|
||
top_friends = friend_msg_counts.most_common(5)
|
||
chat_friends_data = []
|
||
for wxid, count in top_friends:
|
||
chat_friends_data.append({
|
||
'name': get_name(wxid),
|
||
'messageCount': f"{count}条消息",
|
||
'wordCount': f"{friend_word_counts[wxid]}字",
|
||
'avatarSrc': get_avatar_path(wxid)
|
||
})
|
||
|
||
# Monthly Top Friends
|
||
month_friends_data = []
|
||
for i in range(1, 13):
|
||
m_key = f"{i}月"
|
||
if m_key in friend_monthly_counts:
|
||
top_month = max(friend_monthly_counts[m_key].items(), key=lambda x: x[1])
|
||
wxid = top_month[0]
|
||
month_friends_data.append({
|
||
'month': m_key,
|
||
'nickname': get_name(wxid),
|
||
'className': 'passion',
|
||
'num': top_month[1],
|
||
'avatar': get_avatar_path(wxid)
|
||
})
|
||
|
||
# Keywords
|
||
print("正在生成关键词...")
|
||
# full_text = "\n".join(text_content) # Changed to per-message processing
|
||
word_counter = Counter()
|
||
stop_words = {
|
||
'的', '了', '我', '是', '你', '在', '也', '就', '不', '有', '啊', '吧', '吗', '呢', '哈', '去', '都', '那', '一个', '这个', '什么', '怎么', '可以', '知道', '现在', '今天', '就是', '还是', '没有', '不是', '但是', '因为', '所以', '如果', '那个', '觉得', '其实', '应该', '可能', '然后', '时候', '感觉', '一下', '一点', '真的', '已经', '只是', '出来', '起来', '看着', '看到', '自己', '我们', '你们', '他们', '图片', '表情', '收到', '链接', '视频', '语音', 'https', 'http', 'com', 'cn', 'www', '美团', '红包', 'net', 'org', 'html', 'htm',
|
||
'或者', '还有', '微信', '没事', '直接', '明天', '数据', '消息', '东西', '里面', '问号', '朋友', '人家', '之前', '哪个', '开始', '问题', '感情', '晚上', '意思', '学校', '手机', '不能', '一会', '这种', '宝宝', '不行', '我草', '谢谢', '多少', '不到', '的话', '别人', '我要', '左右', '组家', '警这种关系左石东西号1号0号0老狐函我草HШ李睢', '_别乐w月', '或者192', '捂脸', '笑哭', '呲牙', '偷笑', '调皮', '阴险', '晕', '衰', '骷髅', '敲打', '再见', '擦汗', '抠鼻', '鼓掌', '糗大了', '坏笑', '左哼哼', '右哼哼', '哈欠', '鄙视', '委屈', '快哭了', '亲亲', '吓', '可怜', '菜刀', '西瓜', '啤酒', '篮球', '乒乓', '咖啡', '饭', '猪头', '玫瑰', '凋谢', '示爱', '爱心', '心碎', '蛋糕', '闪电', '炸弹', '刀', '足球', '瓢虫', '便便', '月亮', '太阳', '礼物', '拥抱', '强', '弱', '握手', '胜利', '抱拳', '勾引', '拳头', '差劲', '爱你', 'NO', 'OK', '爱情', '飞吻', '跳跳', '发抖', '怄火', '转圈', '磕头', '回头', '跳绳', '挥手', '激动', '街舞', '献吻', '左太极', '右太极'
|
||
}
|
||
|
||
for msg_text in text_content:
|
||
words = jieba.cut(msg_text)
|
||
unique_words_in_msg = set()
|
||
for w in words:
|
||
# Filter: Length > 1, not in stop_words, not digit, not containing digits
|
||
if len(w) > 1 and w not in stop_words and not w.isdigit() and not any(char.isdigit() for char in w):
|
||
# Strict filter: Must contain at least one Chinese character or be a valid English word
|
||
# This filters out garbage like "HШ" or random symbols
|
||
if re.search(r'[\u4e00-\u9fa5]', w) or (w.isalpha() and len(w) > 2):
|
||
unique_words_in_msg.add(w)
|
||
word_counter.update(unique_words_in_msg)
|
||
|
||
common_words = word_counter.most_common(50)
|
||
keywords_list = [[w, c] for w, c in common_words]
|
||
|
||
top_keyword = "无"
|
||
top_keyword_num = 0
|
||
if keywords_list:
|
||
top_keyword = keywords_list[0][0]
|
||
top_keyword_num = keywords_list[0][1]
|
||
|
||
# Heatmap Data (Step Data) - 真正的微信运动步数
|
||
# Format: [['2025-01-01', 10000], ...]
|
||
heatmap_data_js = "[\n"
|
||
for date_str, steps in daily_step_counts.items():
|
||
heatmap_data_js += f" ['{date_str}', {steps}],\n"
|
||
heatmap_data_js += " ]"
|
||
|
||
# Top Emoji
|
||
top_emoji_src = "'./header/header48.webp'" # Default
|
||
if emoji_counter:
|
||
top_md5, top_count = emoji_counter.most_common(1)[0]
|
||
print(f"最常用表情包 MD5: {top_md5} (使用 {top_count} 次)")
|
||
|
||
cdn_url = emoji_urls.get(top_md5)
|
||
|
||
try:
|
||
if not cdn_url:
|
||
emoticon_db_path = os.path.join(db_dir, 'emoticon', 'emoticon.db')
|
||
if os.path.exists(emoticon_db_path):
|
||
import sqlite3
|
||
conn_emo = sqlite3.connect(emoticon_db_path)
|
||
cursor_emo = conn_emo.cursor()
|
||
cursor_emo.execute("select cdn_url, thumb_url from kNonStoreEmoticonTable where md5=?", (top_md5,))
|
||
row = cursor_emo.fetchone()
|
||
conn_emo.close()
|
||
if row:
|
||
cdn_url = row[0] or row[1]
|
||
|
||
if cdn_url:
|
||
cdn_url = html.unescape(cdn_url)
|
||
print(f"下载表情包: {cdn_url}")
|
||
emoji_filename = f"emoji_{top_md5}.jpg"
|
||
emoji_path = os.path.join(avatar_dir, emoji_filename)
|
||
|
||
ssl_context = ssl._create_unverified_context()
|
||
req = urllib.request.Request(cdn_url, headers={'User-Agent': 'Mozilla/5.0'})
|
||
with urllib.request.urlopen(req, context=ssl_context) as response, open(emoji_path, 'wb') as out_file:
|
||
out_file.write(response.read())
|
||
|
||
top_emoji_src = f"'./header/{emoji_filename}'"
|
||
except Exception as e:
|
||
print(f"获取表情包失败: {e}")
|
||
|
||
# 7. Update File
|
||
print(f"正在更新前端文件: {js_file}")
|
||
|
||
with open(js_file, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# Replace Welcome_data
|
||
# Use self_avatar_src
|
||
welcome_js = f"""export const Welcome_data = reactive({{
|
||
avatarSrc: {self_avatar_src},
|
||
nickname: "{self_name}",
|
||
descriptionText: {{
|
||
hello: "Hello World!",
|
||
text1: "时光荏苒,转眼间我们又走过了一年。",
|
||
text2: "在过去的365天里,从深夜的长谈到清晨的祝福,从好友间的调侃到工作中的忙碌……",
|
||
text4: "这些聊天记录,是属于你的独家记忆。",
|
||
text7: "打开报告,开启你的专属年度记忆吧!"
|
||
}}
|
||
}});"""
|
||
content = re.sub(r'export const Welcome_data = reactive\(\{[\s\S]*?\}\);', welcome_js, content)
|
||
|
||
# Replace statsData
|
||
content = re.sub(r'export var statsData = \{[^}]+\};',
|
||
f'export var statsData = {{\n daysInWeChat: {days_in_year},\n numOfFriends: {len(friend_msg_counts)},\n messagesSent: {total_sent},\n messagesReceived: {total_received},\n totalWords: {total_words}\n}};' ,
|
||
content)
|
||
|
||
# Replace yAxisData (Hours)
|
||
content = re.sub(r'export var yAxisData = \[[^\]]+\];',
|
||
f'export var yAxisData = {hour_counts};',
|
||
content)
|
||
|
||
# Replace chatFriendsData
|
||
friends_js = "export const chatFriendsData = reactive({\n chatFriends:[\n"
|
||
for f in chat_friends_data:
|
||
friends_js += f" {{\n name: \"{f['name']}\",\n messageCount: \"{f['messageCount']}\",\n wordCount: \"{f['wordCount']}\",\n avatarSrc: {f['avatarSrc']}\n }},\n"
|
||
friends_js += " ]\n});"
|
||
content = re.sub(r'export const chatFriendsData = reactive\(\{[\s\S]*?\}\);', friends_js, content)
|
||
|
||
# Replace monthFriendsData
|
||
month_js = "export const monthFriendsData = reactive(\n {\n month_data:[\n"
|
||
for m in month_friends_data:
|
||
month_js += f" {{ month: \"{m['month']}\", nickname: \"{m['nickname']}\", className: \"{m['className']}\", num: {m['num']}, avatar: {m['avatar']} }},\n"
|
||
month_js += " ]\n }\n)"
|
||
content = re.sub(r'export const monthFriendsData = reactive\(\s*\{[\s\S]*?\}\s*\)', month_js, content)
|
||
|
||
# Replace keywordsData
|
||
keywords_js = "export const keywordsData = reactive({\n"
|
||
keywords_js += f" keyword: \"{top_keyword}\",\n"
|
||
keywords_js += f" keyword_num: {top_keyword_num},\n"
|
||
keywords_js += " messages : [],\n" # Clear dummy messages
|
||
keywords_js += " chart_option:{},\n"
|
||
keywords_js += " word_counter:[\n"
|
||
for w, c in keywords_list:
|
||
keywords_js += f" [\"{w}\",{c}],\n"
|
||
keywords_js += " ]\n});"
|
||
content = re.sub(r'export const keywordsData = reactive\(\{[\s\S]*?\}\);', keywords_js, content)
|
||
|
||
# Replace stepData (Heatmap)
|
||
# Find "export var stepData = ...;"
|
||
# It might be "getVirtualData('2024')" in the original file
|
||
content = re.sub(r'export var\s+stepData\s+=\s+[^;]+;', f'export var stepData = {heatmap_data_js};', content)
|
||
|
||
# Find max day (步数最高的一天)
|
||
max_day_str = '2025-01-01'
|
||
max_day_count = 0
|
||
if daily_step_counts:
|
||
max_day_str = max(daily_step_counts, key=daily_step_counts.get)
|
||
max_day_count = daily_step_counts[max_day_str]
|
||
|
||
max_date = datetime.datetime.strptime(max_day_str, '%Y-%m-%d')
|
||
|
||
# 计算年度总步数和距离
|
||
total_steps = sum(daily_step_counts.values())
|
||
distance_km = int(total_steps * 0.0007) # 大约每步0.7米
|
||
earth_rounds = round(distance_km / 40075, 2) # 地球周长约40075公里
|
||
|
||
# Update stepdescription - 真实的步数统计
|
||
step_desc_js = """export const stepdescription = {
|
||
sumUp: '行万里路',
|
||
left: {
|
||
totalStepsPrefix: '年度总步数',
|
||
totalSteps: %d,
|
||
distancePrefix: '相当于走了',
|
||
distance: %d,
|
||
distanceSuffix: '公里',
|
||
earthPrefix: '绕了地球',
|
||
earthRounds: %s,
|
||
earthSuffix: '圈',
|
||
},
|
||
right: {
|
||
year: '%s',
|
||
month: '%02d',
|
||
day: '%02d',
|
||
stepsPrefix: '达成',
|
||
steps: %d,
|
||
stepsSuffix: '步',
|
||
message: '这一天,走过的是未知的风景,留下的是每一步的精彩',
|
||
},
|
||
};""" % (total_steps, distance_km, str(earth_rounds), str(max_date.year), max_date.month, max_date.day, max_day_count)
|
||
|
||
content = re.sub(r'export const stepdescription = \{[\s\S]*?\};', step_desc_js, content)
|
||
|
||
# Update Summary Card (wechatReportData)
|
||
summary_friends_js = "friends : [\n"
|
||
for f in chat_friends_data:
|
||
summary_friends_js += f" {{ name: '{f['name']}', avatarSrc: {f['avatarSrc']} }},\n"
|
||
summary_friends_js += " ],"
|
||
content = re.sub(r'friends : \[[\s\S]*?\],', summary_friends_js, content)
|
||
|
||
content = re.sub(r"\{ label: '聊天联系人', value: \d+, unit: '人' \}", f"{{ label: '聊天联系人', value: {len(friend_msg_counts)}, unit: '人' }}", content)
|
||
content = re.sub(r"\{ label: '发送消息', value: \d+, unit: '条' \}", f"{{ label: '发送消息', value: {total_sent}, unit: '条' }}", content)
|
||
content = re.sub(r"\{ label: '收到消息', value: \d+, unit: '条' \}", f"{{ label: '收到消息', value: {total_received}, unit: '条' }}", content)
|
||
content = re.sub(r"\{ label: '发送总字数', value: \d+, unit: '' \}", f"{{ label: '发送总字数', value: {total_words}, unit: '' }}", content)
|
||
content = re.sub(r"\{ label: '年度关键词', value: '[^']+' \}", f"{{ label: '年度关键词', value: '{top_keyword}' }}", content)
|
||
content = re.sub(r"\{ label: '常用表情包', image: '[^']+' \}", f"{{ label: '常用表情包', image: {top_emoji_src} }}", content)
|
||
|
||
with open(js_file, 'w', encoding='utf-8') as f:
|
||
f.write(content)
|
||
|
||
print("生成完成!请刷新网页查看。")
|
||
|
||
if __name__ == '__main__':
|
||
generate_report_data()
|