重写架构,支持微信4.0

This commit is contained in:
SiYuan
2025-03-28 21:29:18 +08:00
parent fc1e2fa7a5
commit 6535ed011c
388 changed files with 20483 additions and 39576 deletions
+19
View File
@@ -0,0 +1,19 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/12/5 22:46
@Author : SiYuan
@Email : 863909694@qq.com
@File : MemoTrace-__init__.py.py
@Description :
"""
from .message import MessageDB
from .contact import ContactDB
from .session import SessionDB
from .head_image import HeadImageDB
from .hardlink import HardLinkDB
if __name__ == '__main__':
pass
+311
View File
@@ -0,0 +1,311 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2025/2/28 0:40
@Author : SiYuan
@Email : 863909694@qq.com
@File : wxManager-biz_message.py
@Description :
"""
import concurrent
import hashlib
import os
import shutil
import threading
from concurrent.futures import ThreadPoolExecutor
from datetime import date, datetime
from typing import Tuple
from wxManager import MessageType
from wxManager.merge import increase_data, increase_update_data
from wxManager.model.db_model import DataBaseBase
def convert_to_timestamp_(time_input) -> int:
if isinstance(time_input, (int, float)):
# 如果输入是时间戳,直接返回
return int(time_input)
elif isinstance(time_input, str):
# 如果输入是格式化的时间字符串,将其转换为时间戳
try:
dt_object = datetime.strptime(time_input, '%Y-%m-%d %H:%M:%S')
return int(dt_object.timestamp())
except ValueError:
# 如果转换失败,可能是其他格式的字符串,可以根据需要添加更多的处理逻辑
print("Error: Unsupported date format")
return -1
elif isinstance(time_input, date):
# 如果输入是datetime.date对象,将其转换为时间戳
dt_object = datetime.combine(time_input, datetime.min.time())
return int(dt_object.timestamp())
else:
print("Error: Unsupported input type")
return -1
def convert_to_timestamp(time_range) -> Tuple[int, int]:
"""
将时间转换成时间戳
@param time_range:
@return:
"""
if not time_range:
return 0, 0
else:
return convert_to_timestamp_(time_range[0]), convert_to_timestamp_(time_range[1])
def get_local_type(type_: MessageType):
return type_
class BizMessageDB(DataBaseBase):
columns = (
"local_id,server_id,local_type,sort_seq,Name2Id.user_name as sender_username,create_time,strftime('%Y-%m-%d %H:%M:%S',"
"create_time,'unixepoch','localtime') as StrTime,status,upload_status,server_seq,origin_source,source,"
"message_content,compress_content")
def get_messages(self):
pass
def table_exists(self, cursor, table_name):
# 查询 sqlite_master 系统表,判断表是否存在
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?;", (table_name,))
result = cursor.fetchone()
# 如果结果不为空,表存在;否则表不存在
return result
def _get_messages_by_username(self, cursor, username: str,
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
table_name = f'Msg_{hashlib.md5(username.encode("utf-8")).hexdigest()}'
if not self.table_exists(cursor, table_name):
return None
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
select {BizMessageDB.columns}
from {table_name} as msg
join Name2Id on msg.real_sender_id = Name2Id.rowid
{'where create_time>' + str(start_time) + ' AND create_time<' + str(end_time) if time_range else ''}
order by sort_seq
'''
cursor.execute(sql)
result = cursor.fetchall()
if result:
return result
else:
return None
def get_messages_by_username(self, username: str,
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
with concurrent.futures.ThreadPoolExecutor() as executor:
# 创建一个任务列表
futures = [
executor.submit(self._get_messages_by_username, db.cursor(), username, time_range)
for db in self.DB
]
# 等待所有任务完成,并获取结果
results = []
for future in concurrent.futures.as_completed(futures):
r1 = future.result()
if r1:
# results.append(future.result())
results.extend(r1)
return results
results = []
# for db in self.DB:
# cursor = db.cursor()
# yield self._get_messages_by_num(cursor, username, start_sort_seq, msg_num)
lock = threading.Lock() # 锁,用于确保线程安全地写入 results
def task(db):
"""
每个线程执行的任务,获取某个数据库实例中的查询结果。
"""
cursor = db.cursor()
try:
data = self._get_messages_by_username(cursor, username, time_range)
with lock: # 确保对 results 的操作是线程安全的
results.append(data)
finally:
cursor.close()
# 使用线程池
with ThreadPoolExecutor(max_workers=len(self.DB)) as executor:
executor.map(task, self.DB)
self.commit()
return results
def _get_messages_by_num(self, cursor, username, start_sort_seq, msg_num):
table_name = f'Msg_{hashlib.md5(username.encode("utf-8")).hexdigest()}'
if not self.table_exists(cursor, table_name):
return []
sql = f'''
select {BizMessageDB.columns}
from {table_name} as msg
join Name2Id on msg.real_sender_id = Name2Id.rowid
where sort_seq < ?
order by sort_seq desc
limit ?
'''
cursor.execute(sql, [start_sort_seq, msg_num])
result = cursor.fetchall()
if result:
return result
else:
return []
def get_message_by_server_id(self, username, server_id):
"""
获取小于start_sort_seq的msg_num个消息
@param username:
@param server_id:
@return: messages, 最后一条消息的start_sort_seq
"""
table_name = f'Msg_{hashlib.md5(username.encode("utf-8")).hexdigest()}'
sql = f'''
select {BizMessageDB.columns}
from {table_name} as msg
join Name2Id on msg.real_sender_id = Name2Id.rowid
where server_id = ?
'''
for db in self.DB:
cursor = db.cursor()
if not self.table_exists(cursor, table_name):
continue
cursor.execute(sql, [server_id])
result = cursor.fetchone()
if result:
return result
def get_messages_by_num(self, username, start_sort_seq, msg_num=20):
results = []
# for db in self.DB:
# cursor = db.cursor()
# yield self._get_messages_by_num(cursor, username, start_sort_seq, msg_num)
lock = threading.Lock() # 锁,用于确保线程安全地写入 results
def task(db):
"""
每个线程执行的任务,获取某个数据库实例中的查询结果。
"""
cursor = db.cursor()
try:
data = self._get_messages_by_num(cursor, username, start_sort_seq, msg_num)
with lock: # 确保对 results 的操作是线程安全的
results.append(data)
finally:
cursor.close()
# 使用线程池
with ThreadPoolExecutor(max_workers=len(self.DB)) as executor:
executor.map(task, self.DB)
self.commit()
return results
def _get_messages_calendar(self, cursor, username):
"""
获取某个人的聊天日历列表
@param username_:
@return:
"""
table_name = f'Msg_{hashlib.md5(username.encode("utf-8")).hexdigest()}'
if not self.table_exists(cursor, table_name):
return None
sql = f'''SELECT DISTINCT strftime('%Y-%m-%d',create_time,'unixepoch','localtime') AS date
from {table_name} as msg
ORDER BY date desc;
'''
cursor.execute(sql)
result = cursor.fetchall()
return (data[0] for data in result)
def get_messages_calendar(self, username):
res = []
for db in self.DB:
r1 = self._get_messages_calendar(db.cursor(), username)
if r1:
res.extend(r1)
res.sort()
return res
def _get_messages_by_type(self, cursor, username: str, type_: MessageType,
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
table_name = f'Msg_{hashlib.md5(username.encode("utf-8")).hexdigest()}'
if not self.table_exists(cursor, table_name):
return None
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
local_type = get_local_type(type_)
sql = f'''
select {BizMessageDB.columns}
from {table_name} as msg
join Name2Id on msg.real_sender_id = Name2Id.rowid
where local_type=? {'and create_time>' + str(start_time) + ' AND create_time<' + str(end_time) if time_range else ''}
order by sort_seq
'''
cursor.execute(sql, [local_type])
result = cursor.fetchall()
if result:
return result
else:
return None
def get_messages_by_type(self, username: str, type_: MessageType,
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
with concurrent.futures.ThreadPoolExecutor() as executor:
# 创建一个任务列表
futures = [
executor.submit(self._get_messages_by_type, db.cursor(), username, type_, time_range)
for db in self.DB
]
# 等待所有任务完成,并获取结果
results = []
for future in concurrent.futures.as_completed(futures):
r1 = future.result()
if r1:
# results.append(future.result())
results.extend(r1)
return results
def merge(self, db_file_name):
def task_(db_path, cursor, db):
"""
每个线程执行的任务,获取某个数据库实例中的查询结果。
"""
increase_data(db_path, cursor, db, 'Name2Id', 'user_name')
increase_update_data(db_path, cursor, db, 'TimeStamp', 'timestamp')
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
result = cursor.fetchall()
# print(result)
if result:
for row in result:
table_name = row[0]
if table_name.startswith('Msg'):
increase_data(db_path, cursor, db, table_name, 'server_id', exclude_first_column=True)
tasks = []
for i in range(100):
db_path = db_file_name.replace('0', f'{i}')
if os.path.exists(db_path):
# print('初始化数据库:', db_path)
file_name = os.path.basename(db_path)
if file_name in self.db_file_name:
index = self.db_file_name.index(file_name)
db = self.DB[index]
cursor = db.cursor()
task_(db_path, cursor, db)
tasks.append([db_path, cursor, db])
else:
shutil.copy(db_path, os.path.join(self.db_dir, 'message'))
# print(tasks)
# 使用线程池 (没有加快合并速度)
# with ThreadPoolExecutor(max_workers=len(tasks)) as executor:
# executor.map(lambda args: task_(*args), tasks)
self.commit()
print(len(tasks))
+152
View File
@@ -0,0 +1,152 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/12/5 22:47
@Author : SiYuan
@Email : 863909694@qq.com
@File : MemoTrace-contact.py
@Description :
"""
import os
import traceback
from wxManager.merge import increase_update_data, increase_data
from wxManager.model.db_model import DataBaseBase
class ContactDB(DataBaseBase):
def create_index(self):
sql = "CREATE INDEX IF NOT EXISTS contact_username ON contact(username);"
try:
cursor = self.DB.cursor()
cursor.execute(sql)
self.commit()
cursor.close()
return True
except:
return False
def get_label_by_id(self, label_id) -> str:
sql = '''
select label_name_ from contact_label
where label_id_ = ?
'''
try:
cursor = self.DB.cursor()
cursor.execute(sql, [label_id])
result = cursor.fetchone()
if result:
return result[0]
else:
return ''
except:
return ''
def get_labels(self, label_id_list) -> str:
if not label_id_list:
return ''
return ','.join(map(self.get_label_by_id, label_id_list.strip(',').split(',')))
def get_contacts(self):
if not self.open_flag:
return []
self.create_index()
'''
@return:
a[0]:username
a[1]:alias
a[2]:local_type
a[3]:flag
a[4]:remark
a[5]:nick_name
a[6]:pin_yin_initial
a[7]:remark_pin_yin_initial
a[8]:small_head_url
a[9]:big_head_url
a[10]:extra_buffer
a[11]:head_img_md5
a[12]:
a[13]:
a[14]:
'''
sql = '''
SELECT username, alias, local_type, flag, remark, nick_name, pin_yin_initial, remark_pin_yin_initial, small_head_url, big_head_url,extra_buffer,head_img_md5,chat_room_notify,is_in_chat_room,description,chat_room_type
FROM contact
WHERE (local_type=1 or local_type=2 or local_type=5)
ORDER BY
CASE
WHEN remark_quan_pin = '' THEN quan_pin
ELSE remark_quan_pin
END ASC
'''
self.cursor.execute(sql)
results = self.cursor.fetchall()
self.DB.commit()
return results
def get_contact_by_username(self, username):
sql = '''
SELECT username, alias, local_type,flag, remark, nick_name, pin_yin_initial, remark_pin_yin_initial, small_head_url, big_head_url,extra_buffer,head_img_md5,chat_room_notify,is_in_chat_room,description,chat_room_type
FROM contact
WHERE username=?
'''
cursor = self.DB.cursor()
cursor.execute(sql, [username])
result = cursor.fetchone()
cursor.close()
# self.commit()
if result:
return result
return None
def get_chatroom_info(self, username):
sql = '''
select id,ext_buffer,username,owner
from chat_room
where username=?
'''
cursor = self.DB.cursor()
cursor.execute(sql, [username])
result = cursor.fetchone()
cursor.close()
if result:
return result
return None
def set_remark(self, username, remark):
if not remark:
return False
sql = '''
update contact
set remark=?
where username=?
'''
cursor = self.DB.cursor()
cursor.execute(sql, [remark, username])
cursor.close()
self.commit()
return True
def merge(self, db_path):
if not (os.path.exists(db_path) or os.path.isfile(db_path)):
print(f'{db_path} 不存在')
return
try:
# 获取列名
increase_update_data(db_path, self.cursor, self.DB, 'biz_info', 'username')
increase_update_data(db_path, self.cursor, self.DB, 'chat_room', 'username')
increase_update_data(db_path, self.cursor, self.DB, 'chat_room_info_detail', 'room_id_')
increase_update_data(db_path, self.cursor, self.DB, 'contact', 'username')
increase_update_data(db_path, self.cursor, self.DB, 'contact_label', 'label_id_')
increase_update_data(db_path, self.cursor, self.DB, 'openim_acct_type', 'lang_id')
increase_update_data(db_path, self.cursor, self.DB, 'openim_appid', 'lang_id')
# increase_update_data(db_path, self.cursor, self.DB, 'chat_room_member', 'room_id_')
increase_data(db_path, self.cursor, self.DB, 'name2id', 'username')
except:
print(f"数据库操作错误: {traceback.format_exc()}")
self.DB.rollback()
if __name__ == '__main__':
pass
+55
View File
@@ -0,0 +1,55 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/12/12 18:10
@Author : SiYuan
@Email : 863909694@qq.com
@File : MemoTrace-emotion.py
@Description :
"""
import os
import traceback
from wxManager.merge import increase_data
from wxManager.model import DataBaseBase
class EmotionDB(DataBaseBase):
def get_emoji_url(self, md5, thumb=False):
emoji_info = self._get_emoji_info(md5)
if emoji_info:
return emoji_info[1] if thumb else emoji_info[2]
else:
return ''
def _get_emoji_info(self, md5):
sql = '''
select aes_key,thumb_url,cdn_url
from kNonStoreEmoticonTable
where md5=?
'''
cursor = self.DB.cursor()
cursor.execute(sql, [md5])
result = cursor.fetchone()
if result:
return result
else:
return None
def merge(self, db_path):
if not (os.path.exists(db_path) or os.path.isfile(db_path)):
print(f'{db_path} 不存在')
return
try:
# 获取列名
increase_data(db_path, self.cursor, self.DB, 'kNonStoreEmoticonTable', 'md5')
increase_data(db_path, self.cursor, self.DB, 'kStoreEmoticonCaptionsTable', 'md5_')
increase_data(db_path, self.cursor, self.DB, 'kStoreEmoticonFilesTable', 'md5_')
increase_data(db_path, self.cursor, self.DB, 'kStoreEmoticonPackageTable', 'package_id_')
except:
print(f"数据库操作错误: {traceback.format_exc()}")
self.DB.rollback()
if __name__ == '__main__':
pass
+277
View File
@@ -0,0 +1,277 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/12/8 17:30
@Author : SiYuan
@Email : 863909694@qq.com
@File : MemoTrace-hardlink.py
@Description :
"""
import hashlib
import os
import traceback
from lxml import etree
from wxManager import Me
from wxManager.merge import increase_data
from wxManager.model.db_model import DataBaseBase
from wxManager.log import logger
from wxManager.model.message import Message
from wxManager.parser.util.protocbuf import file_info_pb2
from google.protobuf.json_format import MessageToJson, MessageToDict
image_root_path = "msg\\attach\\"
video_root_path = "msg\\video\\"
file_root_path = "msg\\file\\"
def get_md5_from_xml(content, type_="img"):
if not content:
return None
try:
content = content.strip('null:').strip().replace(' length="0" ', ' ') # 哪个天才在xml里写两个一样的字段 length="0"
# 解析XML
parser = etree.XMLParser(recover=True)
root = etree.fromstring(content, parser=parser)
if type_ == "img":
# 提取md5的值
md5_value = root.find(".//img").get("md5")
elif type_ == "video":
md5_value = root.find(".//videomsg").get("md5")
else:
md5_value = None
# print(md5_value)
return md5_value
except:
logger.error(traceback.format_exc())
logger.error(content)
return None
class HardLinkDB(DataBaseBase):
def get_image_path(self):
pass
def create_index(self):
sql = "CREATE INDEX IF NOT EXISTS image_hardlink_info_v3_md5 ON image_hardlink_info_v3(md5);"
try:
cursor = self.DB.cursor()
cursor.execute(sql)
self.commit()
cursor.close()
except:
pass
sql = "CREATE INDEX IF NOT EXISTS video_hardlink_info_v3_md5 ON video_hardlink_info_v3(md5);"
try:
cursor = self.DB.cursor()
cursor.execute(sql)
self.commit()
cursor.close()
except:
pass
sql = "CREATE INDEX IF NOT EXISTS file_hardlink_info_v3_md5 ON file_hardlink_info_v3(md5);"
try:
cursor = self.DB.cursor()
cursor.execute(sql)
self.commit()
cursor.close()
except:
pass
def get_image_by_md5(self, md5: str):
sql = '''
select file_size,type,file_name,dir2id.username,dir2id2.username,_rowid_,modify_time,extra_buffer
from image_hardlink_info_v3
join dir2id on dir2id.rowid = dir1
join dir2id as dir2id2 on dir2id2.rowid=dir2
where md5=?
'''
cursor = self.DB.cursor()
cursor.execute(sql, [md5])
result = cursor.fetchall()
if result:
return result[0]
return None
def get_video_by_md5(self, md5: str):
sql = '''
SELECT file_size, type, file_name, dir2id.username, dir2id2.username, _rowid_, modify_time, extra_buffer
FROM video_hardlink_info_v3
JOIN dir2id ON dir2id.rowid = dir1
LEFT JOIN dir2id AS dir2id2 ON dir2id2.rowid = dir2 AND dir2 != 0
WHERE md5 = ?
'''
cursor = self.DB.cursor()
cursor.execute(sql, [md5])
result = cursor.fetchall()
if result:
return result[0]
return None
def get_file_by_md5(self, md5: str):
sql = '''
select file_size,type,file_name,dir2id.username,dir2id2.username,_rowid_,modify_time,extra_buffer
from file_hardlink_info_v3
join dir2id on dir2id.rowid = dir1
LEFT JOIN dir2id AS dir2id2 ON dir2id2.rowid = dir2 AND dir2 != 0
where md5=?
'''
cursor = self.DB.cursor()
cursor.execute(sql, [md5])
result = cursor.fetchall()
if result:
return result[0]
return None
def get_video(self, md5, thumb=False):
video_info = self.get_video_by_md5(md5)
if video_info:
type_ = video_info[1]
if type_ == 5:
dir1 = video_info[3]
dir2 = video_info[4]
extra_buffer = video_info[7]
# 创建顶级消息对象
message = file_info_pb2.FileInfoData()
# 解析二进制数据
message.ParseFromString(extra_buffer)
extra_dic = MessageToDict(message)
dir3 = extra_dic.get('dir3', '')
file_name = video_info[2]
result = os.path.join(video_root_path, dir1, dir2, 'Rec', dir3, 'V', file_name)
else:
dir1 = video_info[3]
data_image = video_info[2].split('.')[0] + '_thumb.jpg' if thumb else video_info[2]
dat_image = os.path.join(video_root_path, dir1, data_image)
result = dat_image
return result
return ''
def get_image_thumb(self, message: Message, talker_username):
"""
@param message:
@param talker_username: 聊天对象的wxid
@return:
"""
dir1 = hashlib.md5(talker_username.encode('utf-8')).hexdigest()
str_time = message.str_time
dir2 = str_time[:7] # 2024-12
dir0 = "Img"
local_id = message.local_id
create_time = message.timestamp
data_image = f'{message.file_name}_t.dat' if message.file_name else f'{local_id}_{create_time}_t.dat'
return os.path.join(image_root_path, dir1, dir2, dir0, data_image)
def get_image_by_time(self, message: Message, talker_username):
"""
@param message:
@param talker_username: 聊天对象的wxid
@return:
"""
dir1 = hashlib.md5(talker_username.encode('utf-8')).hexdigest()
str_time = message.str_time
dir2 = str_time[:7] # 2024-12
dir0 = "Img"
local_id = message.local_id
create_time = message.timestamp
data_image = f'{message.file_name}_W.dat' if message.file_name else f'{local_id}_{create_time}_W.dat'
path1 = os.path.join(image_root_path, dir1, dir2, dir0, data_image)
if os.path.exists(os.path.join(Me().wx_dir, path1)):
return path1
else:
data_image = f'{message.file_name}.dat' if message.file_name else f'{local_id}_{create_time}.dat'
path1 = os.path.join(image_root_path, dir1, dir2, dir0, data_image)
return path1
def get_image(self, content, message, up_dir="", md5=None, thumb=False, talker_username='') -> str:
"""
@param content: image xml
@param message:
@param up_dir:
@param md5: image的md5
@param thumb: 是否是缩略图
@param talker_username: 聊天对象的wxid
@return:
"""
result = '.'
self.create_index()
if thumb:
return self.get_image_thumb(message, talker_username)
else:
result = self.get_image_by_time(message, talker_username)
if os.path.exists(os.path.join(Me().wx_dir, result)):
return result
if not md5:
md5 = get_md5_from_xml(content)
if md5:
imginfo = self.get_image_by_md5(md5)
if imginfo:
type_ = imginfo[1]
if type_ == 4:
dir1 = imginfo[3]
dir2 = imginfo[4]
extra_buffer = imginfo[7]
# 创建顶级消息对象
message = file_info_pb2.FileInfoData()
# 解析二进制数据
message.ParseFromString(extra_buffer)
extra_dic = MessageToDict(message)
dir3 = extra_dic.get('dir3', '')
file_name = imginfo[2]
result = os.path.join(image_root_path, dir1, dir2, 'Rec', dir3, 'Img', file_name)
else:
dir1 = imginfo[3]
dir2 = imginfo[4]
data_image = imginfo[2]
dir0 = "Img"
dat_image = os.path.join(image_root_path, dir1, dir2, dir0, data_image)
result = dat_image
else:
result = self.get_image_thumb(message, talker_username)
else:
result = self.get_image_by_time(message, talker_username)
return result
def get_file(self, md5):
file_info = self.get_file_by_md5(md5)
if file_info:
type_ = file_info[1]
if type_ == 6:
dir1 = file_info[3]
dir2 = file_info[4]
extra_buffer = file_info[7]
# 创建顶级消息对象
message = file_info_pb2.FileInfoData()
# 解析二进制数据
message.ParseFromString(extra_buffer)
extra_dic = MessageToDict(message)
dir3 = extra_dic.get('dir3', '')
file_name = file_info[2]
filepath = os.path.join(image_root_path, dir1, dir2, dir3, file_name)
else:
dir1 = file_info[3]
filename = file_info[2]
filepath = os.path.join(file_root_path, dir1, filename)
return filepath
return ''
def merge(self, db_path):
if not (os.path.exists(db_path) or os.path.isfile(db_path)):
print(f'{db_path} 不存在')
return
try:
# 获取列名
increase_data(db_path, self.cursor, self.DB, 'file_hardlink_info_v3', 'md5')
increase_data(db_path, self.cursor, self.DB, 'image_hardlink_info_v3', 'md5')
increase_data(db_path, self.cursor, self.DB, 'video_hardlink_info_v3', 'md5')
increase_data(db_path, self.cursor, self.DB, 'dir2id', 'username')
except:
print(f"数据库操作错误: {traceback.format_exc()}")
self.DB.rollback()
if __name__ == '__main__':
pass
+91
View File
@@ -0,0 +1,91 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/12/5 23:35
@Author : SiYuan
@Email : 863909694@qq.com
@File : MemoTrace-head_image.py
@Description :
"""
import hashlib
import io
import os
import time
import traceback
from PIL import Image
from wxManager.merge import increase_update_data
from wxManager.model.db_model import DataBaseBase
from wxManager.log import logger
class HeadImageDB(DataBaseBase):
def get_avatar_buffer(self, username):
if not self.open_flag:
return b''
sql = '''
select image_buffer
from head_image
where username = ?
'''
cursor = self.DB.cursor()
cursor.execute(sql, [username])
result = cursor.fetchall()
cursor.close()
self.DB.commit()
if result:
return result[0][0]
else:
return b''
def set_avatar_buffer(self, username, img_path):
try:
# 打开图片并缩放
with Image.open(img_path) as img:
img = img.resize((128, 128))
# 将图片转换为二进制格式
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='PNG') # 可以根据需要更改格式
img_binary = img_byte_arr.getvalue()
md5_hash = hashlib.md5()
md5_hash.update(img_binary)
update_sql = '''
UPDATE head_image
SET update_time = ?,image_buffer=?,md5=?
WHERE username = ?
'''
cursor = self.DB.cursor()
cursor.execute(update_sql, [int(time.time()), img_binary, username, md5_hash.hexdigest()])
# 检查是否有行被更新
if cursor.rowcount == 0:
# 如果没有更新,则插入新记录
insert_sql = '''
INSERT INTO head_image (username,md5, image_buffer,update_time)
VALUES (?, ?,?,?)
'''
cursor.execute(insert_sql, [username, md5_hash.hexdigest(), int(time.time()), img_binary])
cursor.close()
self.commit() # 提交更改
except:
logger.error(traceback.format_exc())
return False
return True
def merge(self, db_path):
if not (os.path.exists(db_path) or os.path.isfile(db_path)):
print(f'{db_path} 不存在')
return
try:
# 获取列名
increase_update_data(db_path, self.cursor, self.DB, 'head_image', 'username')
except:
print(f"数据库操作错误: {traceback.format_exc()}")
self.DB.rollback()
if __name__ == '__main__':
pass
+116
View File
@@ -0,0 +1,116 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/12/12 17:06
@Author : SiYuan
@Email : 863909694@qq.com
@File : MemoTrace-media.py
@Description :
"""
import os
import subprocess
import sys
import traceback
from wxManager.merge import increase_update_data, increase_data
from wxManager.model import DataBaseBase
from wxManager.log import logger
def get_ffmpeg_path():
# 获取打包后的资源目录
resource_dir = getattr(sys, '_MEIPASS', os.path.abspath(os.path.dirname(__file__)))
# 构建 FFmpeg 可执行文件的路径
ffmpeg_path = os.path.join(resource_dir, 'app', 'resources', 'data', 'ffmpeg.exe')
return ffmpeg_path
class MediaDB(DataBaseBase):
def get_media_buffer(self, server_id) -> bytes:
sql = '''
select voice_data
from VoiceInfo
where svr_id = ?
'''
for db in self.DB:
cursor = db.cursor()
cursor.execute(sql, [server_id])
result = cursor.fetchone()
if result:
return result[0]
return b''
def get_audio_path(self, server_id, output_dir, filename=''):
if filename:
return f'{output_dir}/{filename}.mp3'
else:
return f'{output_dir}/{server_id}.mp3'
def get_audio(self, server_id, output_dir, filename=''):
if not filename:
filename = server_id
silk_path = f"{output_dir}/{filename}.silk"
pcm_path = f"{output_dir}/{filename}.pcm"
mp3_path = f"{output_dir}/{filename}.mp3"
if os.path.exists(mp3_path):
return mp3_path
buf = self.get_media_buffer(server_id)
if not buf:
return ''
with open(silk_path, "wb") as f:
f.write(buf)
# open(silk_path, "wb").write()
try:
decode(silk_path, pcm_path, 44100)
# 调用系统上的 ffmpeg 可执行文件
# 获取 FFmpeg 可执行文件的路径
ffmpeg_path = get_ffmpeg_path()
# # 调用 FFmpeg
if os.path.exists(ffmpeg_path):
cmd = f'''"{ffmpeg_path}" -loglevel quiet -y -f s16le -i "{pcm_path}" -ar 44100 -ac 1 "{mp3_path}"'''
# system(cmd)
# 使用subprocess.run()执行命令
subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
else:
# 源码运行的时候下面的有效
# 这里不知道怎么捕捉异常
cmd = f'''"{os.path.join(os.getcwd(), 'app', 'resources', 'data', 'ffmpeg.exe')}" -loglevel quiet -y -f s16le -i "{pcm_path}" -ar 44100 -ac 1 "{mp3_path}"'''
# system(cmd)
# 使用subprocess.run()执行命令
subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if os.path.exists(silk_path):
os.remove(silk_path)
if os.path.exists(pcm_path):
os.remove(pcm_path)
except Exception as e:
print(f"Error: {e}")
logger.error(f'语音错误\n{traceback.format_exc()}')
cmd = f'''"{os.path.join(os.getcwd(), 'app', 'resources', 'data', 'ffmpeg.exe')}" -loglevel quiet -y -f s16le -i "{pcm_path}" -ar 44100 -ac 1 "{mp3_path}"'''
# system(cmd)
# 使用subprocess.run()执行命令
subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
finally:
return mp3_path
def merge(self, db_path):
# todo 判断数据库对应情况
if not (os.path.exists(db_path) or os.path.isfile(db_path)):
print(f'{db_path} 不存在')
return
for db in self.DB:
cursor = db.cursor()
try:
# 获取列名
increase_data(db_path, cursor, db, 'VoiceInfo', 'svr_id')
increase_data(db_path, cursor, db, 'Name2Id', 'user_name')
increase_update_data(db_path, cursor, db, 'Timestamp', 'timestamp')
except:
print(f"数据库操作错误: {traceback.format_exc()}")
db.rollback()
if __name__ == '__main__':
pass
+316
View File
@@ -0,0 +1,316 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/12/6 23:07
@Author : SiYuan
@Email : 863909694@qq.com
@File : MemoTrace-message.py
@Description :
"""
import concurrent
import hashlib
import os
import shutil
import threading
import traceback
from concurrent.futures import ThreadPoolExecutor
from datetime import date, datetime
from typing import Tuple
from wxManager import MessageType
from wxManager.merge import increase_data, increase_update_data
from wxManager.model.db_model import DataBaseBase
def convert_to_timestamp_(time_input) -> int:
if isinstance(time_input, (int, float)):
# 如果输入是时间戳,直接返回
return int(time_input)
elif isinstance(time_input, str):
# 如果输入是格式化的时间字符串,将其转换为时间戳
try:
dt_object = datetime.strptime(time_input, '%Y-%m-%d %H:%M:%S')
return int(dt_object.timestamp())
except ValueError:
# 如果转换失败,可能是其他格式的字符串,可以根据需要添加更多的处理逻辑
print("Error: Unsupported date format")
return -1
elif isinstance(time_input, date):
# 如果输入是datetime.date对象,将其转换为时间戳
dt_object = datetime.combine(time_input, datetime.min.time())
return int(dt_object.timestamp())
else:
print("Error: Unsupported input type")
return -1
def convert_to_timestamp(time_range) -> Tuple[int, int]:
"""
将时间转换成时间戳
@param time_range:
@return:
"""
if not time_range:
return 0, 0
else:
return convert_to_timestamp_(time_range[0]), convert_to_timestamp_(time_range[1])
def get_local_type(type_: MessageType):
return type_
class MessageDB(DataBaseBase):
columns = (
"local_id,server_id,local_type,sort_seq,Name2Id.user_name as sender_username,create_time,strftime('%Y-%m-%d %H:%M:%S',"
"create_time,'unixepoch','localtime') as StrTime,status,upload_status,server_seq,origin_source,source,"
"message_content,compress_content,packed_info_data")
def get_messages(self):
pass
def table_exists(self, cursor, table_name):
# 查询 sqlite_master 系统表,判断表是否存在
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?;", (table_name,))
result = cursor.fetchone()
# 如果结果不为空,表存在;否则表不存在
return result
def _get_messages_by_username(self, cursor, username: str,
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
table_name = f'Msg_{hashlib.md5(username.encode("utf-8")).hexdigest()}'
if not self.table_exists(cursor, table_name):
return None
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
select {MessageDB.columns}
from {table_name} as msg
join Name2Id on msg.real_sender_id = Name2Id.rowid
{'where create_time>' + str(start_time) + ' AND create_time<' + str(end_time) if time_range else ''}
order by sort_seq
'''
cursor.execute(sql)
result = cursor.fetchall()
if result:
return result
else:
return None
def get_messages_by_username(self, username: str,
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
with concurrent.futures.ThreadPoolExecutor() as executor:
# 创建一个任务列表
futures = [
executor.submit(self._get_messages_by_username, db.cursor(), username, time_range)
for db in self.DB
]
# 等待所有任务完成,并获取结果
results = []
for future in concurrent.futures.as_completed(futures):
r1 = future.result()
if r1:
# results.append(future.result())
results.extend(r1)
return results
results = []
# for db in self.DB:
# cursor = db.cursor()
# yield self._get_messages_by_num(cursor, username, start_sort_seq, msg_num)
lock = threading.Lock() # 锁,用于确保线程安全地写入 results
def task(db):
"""
每个线程执行的任务,获取某个数据库实例中的查询结果。
"""
cursor = db.cursor()
try:
data = self._get_messages_by_username(cursor, username, time_range)
with lock: # 确保对 results 的操作是线程安全的
results.append(data)
finally:
cursor.close()
# 使用线程池
with ThreadPoolExecutor(max_workers=len(self.DB)) as executor:
executor.map(task, self.DB)
self.commit()
return results
def _get_messages_by_num(self, cursor, username, start_sort_seq, msg_num):
table_name = f'Msg_{hashlib.md5(username.encode("utf-8")).hexdigest()}'
if not self.table_exists(cursor, table_name):
return []
sql = f'''
select {MessageDB.columns}
from {table_name} as msg
join Name2Id on msg.real_sender_id = Name2Id.rowid
where sort_seq < ?
order by sort_seq desc
limit ?
'''
cursor.execute(sql, [start_sort_seq, msg_num])
result = cursor.fetchall()
if result:
return result
else:
return []
def get_message_by_server_id(self, username, server_id):
"""
获取小于start_sort_seq的msg_num个消息
@param username:
@param server_id:
@return: messages, 最后一条消息的start_sort_seq
"""
table_name = f'Msg_{hashlib.md5(username.encode("utf-8")).hexdigest()}'
sql = f'''
select {MessageDB.columns}
from {table_name} as msg
join Name2Id on msg.real_sender_id = Name2Id.rowid
where server_id = ?
'''
for db in self.DB:
cursor = db.cursor()
if not self.table_exists(cursor, table_name):
continue
cursor.execute(sql, [server_id])
result = cursor.fetchone()
if result:
return result
def get_messages_by_num(self, username, start_sort_seq, msg_num=20):
results = []
# for db in self.DB:
# cursor = db.cursor()
# yield self._get_messages_by_num(cursor, username, start_sort_seq, msg_num)
lock = threading.Lock() # 锁,用于确保线程安全地写入 results
def task(db):
"""
每个线程执行的任务,获取某个数据库实例中的查询结果。
"""
cursor = db.cursor()
try:
data = self._get_messages_by_num(cursor, username, start_sort_seq, msg_num)
with lock: # 确保对 results 的操作是线程安全的
results.append(data)
finally:
cursor.close()
# 使用线程池
with ThreadPoolExecutor(max_workers=len(self.DB)) as executor:
executor.map(task, self.DB)
self.commit()
return results
def _get_messages_calendar(self, cursor, username):
"""
获取某个人的聊天日历列表
@param username_:
@return:
"""
table_name = f'Msg_{hashlib.md5(username.encode("utf-8")).hexdigest()}'
if not self.table_exists(cursor, table_name):
return None
sql = f'''SELECT DISTINCT strftime('%Y-%m-%d',create_time,'unixepoch','localtime') AS date
from {table_name} as msg
ORDER BY date desc;
'''
cursor.execute(sql)
result = cursor.fetchall()
return (data[0] for data in result)
def get_messages_calendar(self, username):
res = []
for db in self.DB:
r1 = self._get_messages_calendar(db.cursor(), username)
if r1:
res.extend(r1)
res.sort()
return res
def _get_messages_by_type(self, cursor, username: str, type_: MessageType,
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
table_name = f'Msg_{hashlib.md5(username.encode("utf-8")).hexdigest()}'
if not self.table_exists(cursor, table_name):
return None
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
local_type = get_local_type(type_)
sql = f'''
select {MessageDB.columns}
from {table_name} as msg
join Name2Id on msg.real_sender_id = Name2Id.rowid
where local_type=? {'and create_time>' + str(start_time) + ' AND create_time<' + str(end_time) if time_range else ''}
order by sort_seq
'''
cursor.execute(sql, [local_type])
result = cursor.fetchall()
if result:
return result
else:
return None
def get_messages_by_type(self, username: str, type_: MessageType,
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
with concurrent.futures.ThreadPoolExecutor() as executor:
# 创建一个任务列表
futures = [
executor.submit(self._get_messages_by_type, db.cursor(), username, type_, time_range)
for db in self.DB
]
# 等待所有任务完成,并获取结果
results = []
for future in concurrent.futures.as_completed(futures):
r1 = future.result()
if r1:
# results.append(future.result())
results.extend(r1)
return results
def merge(self, db_file_name):
def task_(db_path, cursor, db):
"""
每个线程执行的任务,获取某个数据库实例中的查询结果。
"""
increase_data(db_path, cursor, db, 'Name2Id', 'user_name')
increase_update_data(db_path, cursor, db, 'TimeStamp', 'timestamp')
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
result = cursor.fetchall()
# print(result)
if result:
for row in result:
table_name = row[0]
if table_name.startswith('Msg'):
increase_data(db_path, cursor, db, table_name, 'server_id', exclude_first_column=True)
tasks = []
for i in range(100):
db_path = db_file_name.replace('0', f'{i}')
if os.path.exists(db_path):
# print('初始化数据库:', db_path)
file_name = os.path.basename(db_path)
if file_name in self.db_file_name:
index = self.db_file_name.index(file_name)
db = self.DB[index]
cursor = db.cursor()
task_(db_path, cursor, db)
tasks.append([db_path, cursor, db])
else:
shutil.copy(db_path, os.path.join(self.db_dir, 'Multi', file_name))
# print(tasks)
# 使用线程池 (没有加快合并速度)
# with ThreadPoolExecutor(max_workers=len(tasks)) as executor:
# executor.map(lambda args: task_(*args), tasks)
self.commit()
print(len(tasks))
if __name__ == '__main__':
pass
+51
View File
@@ -0,0 +1,51 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/12/7 0:04
@Author : SiYuan
@Email : 863909694@qq.com
@File : MemoTrace-session.py
@Description :
"""
import os
import traceback
from wxManager.merge import increase_update_data
from wxManager.model.db_model import DataBaseBase
class SessionDB(DataBaseBase):
def get_session(self):
if not self.open_flag:
return []
sql = '''
select username, type, unread_count, unread_first_msg_srv_id,last_timestamp, summary,last_msg_type,last_msg_sub_type,strftime('%Y/%m/%d', last_timestamp, 'unixepoch','localtime') AS strTime,last_sender_display_name,last_msg_sender
from SessionTable
order by sort_timestamp desc
'''
self.cursor.execute(sql)
result = self.cursor.fetchall()
self.commit()
if result:
return result
else:
return []
def merge(self, db_path):
if not (os.path.exists(db_path) or os.path.isfile(db_path)):
print(f'{db_path} 不存在')
return
try:
# 获取列名
increase_update_data(db_path, self.cursor, self.DB, 'SessionTable', 'username')
except:
print(f"数据库操作错误: {traceback.format_exc()}")
self.DB.rollback()
if __name__ == '__main__':
cd = SessionDB('session/session.db')
cd.init_database(r'E:\Project\Python\MemoTrace\app\DataBase\Msg\wxid_27hqbq7vx5hf22\db_storage')
r = cd.get_session()
print(r)