mirror of
https://github.com/teest114514/chatlog_alpha.git
synced 2026-03-29 06:19:09 +08:00
Optimize image access with MD5-path cache and fallback
Introduces an MD5-to-path cache for media files, improving image access reliability. The /image/{md5} endpoint now uses a three-level fallback: hardlink table, cache-based path with suffix matching, and directory recursion. This addresses issues with image retrieval, especially for WeChat v4 packed_info_data cases, and updates the README with details of the new mechanism.
This commit is contained in:
7
.claude/settings.local.json
Normal file
7
.claude/settings.local.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(go build:*)"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,13 @@
|
||||
|
||||
## 更新日志
|
||||
|
||||
### 2026年1月13日
|
||||
- **图片访问优化**:
|
||||
- **MD5-Path 缓存机制**:新增 md5 到 path 的映射缓存,当调用 `/api/v1/chatlog` 接口时自动构建图片文件的 md5 与路径映射关系。
|
||||
- **智能文件查找**:优化 `/image/{md5}` 接口,当通过 hardlink 表无法找到图片时,会自动使用缓存中的 path 并按优先级尝试 `.dat`(原图)、`_h.dat`(高清图)、`_t.dat`(缩略图)等后缀进行文件匹配。
|
||||
- **递回退逻辑**:实现了三层查找机制 - hardlink 表查询 → 缓存路径匹配 → 目录递归搜索,确保图片访问的高成功率。
|
||||
- **适用场景**:特别适用于微信 v4 版本中 `packed_info_data` 提供的路径信息,解决部分图片无法通过 md5 直接访问的问题。
|
||||
|
||||
### 2025年12月30日
|
||||
- **ChatLab 标准化支持**:
|
||||
- **全量适配**:消息检索功能现已完全适配 [ChatLab 标准化格式 (v0.0.1)](./chatlab.md),支持导出包含完整元数据、成员信息及标准化消息类型的 JSON 文件。
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"embed"
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -131,6 +132,9 @@ func (s *Service) handleChatlog(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Populate md5->path cache for media files
|
||||
s.populateMD5PathCache(messages)
|
||||
|
||||
switch strings.ToLower(q.Format) {
|
||||
case "chatlab":
|
||||
talkerName := q.Talker
|
||||
@@ -464,6 +468,28 @@ func (s *Service) handleMedia(c *gin.Context, _type string) {
|
||||
}
|
||||
media, err := s.db.GetMedia(_type, k)
|
||||
if err != nil {
|
||||
// Fallback 1: try to find path from md5->path cache
|
||||
if cachedPath := s.getMD5FromCache(k); cachedPath != "" {
|
||||
// Try to find the actual file with different suffixes
|
||||
if absolutePath := s.tryFindFileWithSuffixes(cachedPath); absolutePath != "" {
|
||||
if _type == "image" {
|
||||
s.handleImageFile(c, absolutePath)
|
||||
return
|
||||
}
|
||||
c.Redirect(http.StatusFound, "/data/"+cachedPath)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback 2: try to find file by md5 in msg/attach directory
|
||||
if _type == "image" && !strings.Contains(k, "/") {
|
||||
if foundPath := s.findImageByMD5(k); foundPath != "" {
|
||||
// Process the found image file
|
||||
s.handleImageFile(c, foundPath)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
_err = err
|
||||
continue
|
||||
}
|
||||
@@ -476,59 +502,8 @@ func (s *Service) handleMedia(c *gin.Context, _type string) {
|
||||
s.HandleVoice(c, media.Data)
|
||||
return
|
||||
case "image":
|
||||
// If it's not a .dat file, redirect to the data handler as before.
|
||||
if !strings.HasSuffix(strings.ToLower(media.Path), ".dat") {
|
||||
c.Redirect(http.StatusFound, "/data/"+media.Path)
|
||||
return
|
||||
}
|
||||
|
||||
// It is a .dat file. Decrypt, save, and redirect to the new file.
|
||||
absolutePath := filepath.Join(s.conf.GetDataDir(), media.Path)
|
||||
|
||||
// Build the potential output path to check if it exists
|
||||
var newRelativePath string
|
||||
outputPath := strings.TrimSuffix(absolutePath, filepath.Ext(absolutePath))
|
||||
relativePathBase := strings.TrimSuffix(media.Path, filepath.Ext(media.Path))
|
||||
|
||||
// Check if a converted file already exists
|
||||
for _, ext := range []string{".jpg", ".png", ".gif", ".jpeg", ".bmp", ".mp4"} {
|
||||
if _, err := os.Stat(outputPath + ext); err == nil {
|
||||
newRelativePath = relativePathBase + ext
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If a converted file is found, redirect to it immediately
|
||||
if newRelativePath != "" {
|
||||
c.Redirect(http.StatusFound, "/data/"+newRelativePath)
|
||||
return
|
||||
}
|
||||
|
||||
// If not found, decrypt and save it
|
||||
b, err := os.ReadFile(absolutePath)
|
||||
if err != nil {
|
||||
errors.Err(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
out, ext, err := dat2img.Dat2Image(b)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{
|
||||
"error": "Failed to parse .dat file",
|
||||
"reason": err.Error(),
|
||||
"path": absolutePath,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Save the decrypted file. s.saveDecryptedFile handles the existence check.
|
||||
s.saveDecryptedFile(absolutePath, out, ext)
|
||||
|
||||
// Build the new relative path and redirect
|
||||
newRelativePath = relativePathBase + "." + ext
|
||||
c.Redirect(http.StatusFound, "/data/"+newRelativePath)
|
||||
s.handleImageFile(c, filepath.Join(s.conf.GetDataDir(), media.Path))
|
||||
return
|
||||
|
||||
default:
|
||||
// For other types, keep the old redirect logic
|
||||
c.Redirect(http.StatusFound, "/data/"+media.Path)
|
||||
@@ -564,6 +539,195 @@ func (s *Service) findPath(_type string, key string) (string, error) {
|
||||
return "", errors.ErrMediaNotFound
|
||||
}
|
||||
|
||||
// findImageByMD5 searches for an image file by MD5 in the msg/attach directory
|
||||
// It tries different suffixes: _h.dat, .dat, _t.dat
|
||||
func (s *Service) findImageByMD5(md5 string) string {
|
||||
dataDir := s.conf.GetDataDir()
|
||||
attachDir := filepath.Join(dataDir, "msg", "attach")
|
||||
|
||||
// Check if attach directory exists
|
||||
if _, err := os.Stat(attachDir); os.IsNotExist(err) {
|
||||
return ""
|
||||
}
|
||||
|
||||
var foundPath string
|
||||
|
||||
// Walk through the attach directory to find files matching the md5
|
||||
err := filepath.Walk(attachDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
// Skip directories we can't access
|
||||
if os.IsPermission(err) {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Stop if we already found a file
|
||||
if foundPath != "" {
|
||||
return io.EOF
|
||||
}
|
||||
|
||||
// Skip directories
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if file name contains the md5
|
||||
baseName := strings.ToLower(filepath.Base(path))
|
||||
if !strings.Contains(baseName, strings.ToLower(md5)) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if it's a .dat file
|
||||
if !strings.HasSuffix(baseName, ".dat") {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Try to read and verify the file
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
foundPath = path
|
||||
return io.EOF
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
// If we found io.EOF, it means we found the file
|
||||
if err == io.EOF && foundPath != "" {
|
||||
return foundPath
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// getMD5FromCache retrieves path from md5->path cache
|
||||
func (s *Service) getMD5FromCache(md5 string) string {
|
||||
s.md5PathMu.RLock()
|
||||
defer s.md5PathMu.RUnlock()
|
||||
|
||||
if path, ok := s.md5PathCache[md5]; ok {
|
||||
log.Debug().Str("md5", md5).Str("path", path).Msg("Cache hit for md5")
|
||||
return path
|
||||
}
|
||||
|
||||
log.Debug().Str("md5", md5).Msg("Cache miss for md5")
|
||||
return ""
|
||||
}
|
||||
|
||||
// tryFindFileWithSuffixes tries to find a file with different suffixes
|
||||
// Priority: .dat (original) -> _h.dat (HD) -> _t.dat (thumbnail)
|
||||
func (s *Service) tryFindFileWithSuffixes(basePath string) string {
|
||||
dataDir := s.conf.GetDataDir()
|
||||
|
||||
// Try different suffixes with priority: original -> HD -> thumbnail
|
||||
suffixes := []string{".dat", "_h.dat", "_t.dat"}
|
||||
|
||||
for _, suffix := range suffixes {
|
||||
testPath := filepath.Join(dataDir, basePath+suffix)
|
||||
if _, err := os.Stat(testPath); err == nil {
|
||||
log.Debug().Str("path", testPath).Str("suffix", suffix).Msg("Found file with suffix")
|
||||
return testPath
|
||||
}
|
||||
}
|
||||
|
||||
// Try without any suffix (might already have extension)
|
||||
testPath := filepath.Join(dataDir, basePath)
|
||||
if _, err := os.Stat(testPath); err == nil {
|
||||
log.Debug().Str("path", testPath).Msg("Found file without suffix")
|
||||
return testPath
|
||||
}
|
||||
|
||||
log.Debug().Str("basePath", basePath).Msg("File not found with any suffix")
|
||||
return ""
|
||||
}
|
||||
|
||||
// populateMD5PathCache populates the md5->path cache from messages
|
||||
func (s *Service) populateMD5PathCache(messages []*model.Message) {
|
||||
s.md5PathMu.Lock()
|
||||
defer s.md5PathMu.Unlock()
|
||||
|
||||
for _, msg := range messages {
|
||||
if msg.Contents == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Only cache for image, video, and file types
|
||||
if msg.Type != model.MessageTypeImage &&
|
||||
msg.Type != model.MessageTypeVideo &&
|
||||
msg.Type != model.MessageTypeVoice {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get md5 from contents
|
||||
md5Value, md5Ok := msg.Contents["md5"].(string)
|
||||
if !md5Ok || md5Value == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get path from contents
|
||||
pathValue, pathOk := msg.Contents["path"].(string)
|
||||
if pathOk && pathValue != "" {
|
||||
s.md5PathCache[md5Value] = pathValue
|
||||
log.Debug().Str("md5", md5Value).Str("path", pathValue).Msg("Cached md5->path mapping")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleImageFile processes an image file, handling decryption if it's a .dat file
|
||||
func (s *Service) handleImageFile(c *gin.Context, absolutePath string) {
|
||||
// If it's not a .dat file, redirect to the data handler
|
||||
if !strings.HasSuffix(strings.ToLower(absolutePath), ".dat") {
|
||||
relativePath := strings.TrimPrefix(absolutePath, s.conf.GetDataDir())
|
||||
relativePath = strings.TrimPrefix(relativePath, string(filepath.Separator))
|
||||
c.Redirect(http.StatusFound, "/data/"+relativePath)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if already converted
|
||||
outputPath := strings.TrimSuffix(absolutePath, filepath.Ext(absolutePath))
|
||||
var newRelativePath string
|
||||
relativePathBase := strings.TrimPrefix(outputPath, s.conf.GetDataDir())
|
||||
relativePathBase = strings.TrimPrefix(relativePathBase, string(filepath.Separator))
|
||||
|
||||
// Check if a converted file already exists
|
||||
for _, ext := range []string{".jpg", ".png", ".gif", ".jpeg", ".bmp"} {
|
||||
if _, err := os.Stat(outputPath + ext); err == nil {
|
||||
newRelativePath = relativePathBase + ext
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If a converted file is found, redirect to it immediately
|
||||
if newRelativePath != "" {
|
||||
c.Redirect(http.StatusFound, "/data/"+newRelativePath)
|
||||
return
|
||||
}
|
||||
|
||||
// Decrypt and convert the .dat file
|
||||
b, err := os.ReadFile(absolutePath)
|
||||
if err != nil {
|
||||
errors.Err(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
out, ext, err := dat2img.Dat2Image(b)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{
|
||||
"error": "Failed to parse .dat file",
|
||||
"reason": err.Error(),
|
||||
"path": absolutePath,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Save the decrypted file
|
||||
s.saveDecryptedFile(absolutePath, out, ext)
|
||||
|
||||
// Build the new relative path and redirect
|
||||
newRelativePath = relativePathBase + "." + ext
|
||||
c.Redirect(http.StatusFound, "/data/"+newRelativePath)
|
||||
}
|
||||
|
||||
func (s *Service) handleMediaData(c *gin.Context) {
|
||||
relativePath := filepath.Clean(c.Param("path"))
|
||||
|
||||
|
||||
@@ -40,6 +40,10 @@ type Service struct {
|
||||
mcpSubscriptions map[string]*Subscription
|
||||
mcpSubMu sync.RWMutex
|
||||
|
||||
// md5 到 path 的缓存(用于图片、视频等媒体文件)
|
||||
md5PathCache map[string]string
|
||||
md5PathMu sync.RWMutex
|
||||
|
||||
lastPushTime time.Time
|
||||
lastPushTalker string
|
||||
|
||||
@@ -74,6 +78,7 @@ func NewService(conf Config, db *database.Service) *Service {
|
||||
db: db,
|
||||
router: router,
|
||||
mcpSubscriptions: make(map[string]*Subscription),
|
||||
md5PathCache: make(map[string]string),
|
||||
subscriptionPath: filepath.Join(conf.GetDataDir(), "subscriptions.json"),
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user