diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..b55e75b --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,7 @@ +{ + "permissions": { + "allow": [ + "Bash(go build:*)" + ] + } +} diff --git a/README.md b/README.md index b89b937..369f12b 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,13 @@ ## 更新日志 +### 2026年1月13日 +- **图片访问优化**: + - **MD5-Path 缓存机制**:新增 md5 到 path 的映射缓存,当调用 `/api/v1/chatlog` 接口时自动构建图片文件的 md5 与路径映射关系。 + - **智能文件查找**:优化 `/image/{md5}` 接口,当通过 hardlink 表无法找到图片时,会自动使用缓存中的 path 并按优先级尝试 `.dat`(原图)、`_h.dat`(高清图)、`_t.dat`(缩略图)等后缀进行文件匹配。 + - **递回退逻辑**:实现了三层查找机制 - hardlink 表查询 → 缓存路径匹配 → 目录递归搜索,确保图片访问的高成功率。 + - **适用场景**:特别适用于微信 v4 版本中 `packed_info_data` 提供的路径信息,解决部分图片无法通过 md5 直接访问的问题。 + ### 2025年12月30日 - **ChatLab 标准化支持**: - **全量适配**:消息检索功能现已完全适配 [ChatLab 标准化格式 (v0.0.1)](./chatlab.md),支持导出包含完整元数据、成员信息及标准化消息类型的 JSON 文件。 diff --git a/internal/chatlog/http/route.go b/internal/chatlog/http/route.go index af53aa3..0762878 100644 --- a/internal/chatlog/http/route.go +++ b/internal/chatlog/http/route.go @@ -4,6 +4,7 @@ import ( "embed" "encoding/csv" "fmt" + "io" "io/fs" "net/http" "os" @@ -131,6 +132,9 @@ func (s *Service) handleChatlog(c *gin.Context) { return } + // Populate md5->path cache for media files + s.populateMD5PathCache(messages) + switch strings.ToLower(q.Format) { case "chatlab": talkerName := q.Talker @@ -464,6 +468,28 @@ func (s *Service) handleMedia(c *gin.Context, _type string) { } media, err := s.db.GetMedia(_type, k) if err != nil { + // Fallback 1: try to find path from md5->path cache + if cachedPath := s.getMD5FromCache(k); cachedPath != "" { + // Try to find the actual file with different suffixes + if absolutePath := s.tryFindFileWithSuffixes(cachedPath); absolutePath != "" { + if _type == "image" { + s.handleImageFile(c, absolutePath) + return + } + c.Redirect(http.StatusFound, "/data/"+cachedPath) + return + } + } + + // Fallback 2: try to find file by md5 in msg/attach directory + if _type == "image" && !strings.Contains(k, "/") { + if foundPath := s.findImageByMD5(k); foundPath != "" { + // Process the found image file + s.handleImageFile(c, foundPath) + return + } + } + _err = err continue } @@ -476,59 +502,8 @@ func (s *Service) handleMedia(c *gin.Context, _type string) { s.HandleVoice(c, media.Data) return case "image": - // If it's not a .dat file, redirect to the data handler as before. - if !strings.HasSuffix(strings.ToLower(media.Path), ".dat") { - c.Redirect(http.StatusFound, "/data/"+media.Path) - return - } - - // It is a .dat file. Decrypt, save, and redirect to the new file. - absolutePath := filepath.Join(s.conf.GetDataDir(), media.Path) - - // Build the potential output path to check if it exists - var newRelativePath string - outputPath := strings.TrimSuffix(absolutePath, filepath.Ext(absolutePath)) - relativePathBase := strings.TrimSuffix(media.Path, filepath.Ext(media.Path)) - - // Check if a converted file already exists - for _, ext := range []string{".jpg", ".png", ".gif", ".jpeg", ".bmp", ".mp4"} { - if _, err := os.Stat(outputPath + ext); err == nil { - newRelativePath = relativePathBase + ext - break - } - } - - // If a converted file is found, redirect to it immediately - if newRelativePath != "" { - c.Redirect(http.StatusFound, "/data/"+newRelativePath) - return - } - - // If not found, decrypt and save it - b, err := os.ReadFile(absolutePath) - if err != nil { - errors.Err(c, err) - return - } - - out, ext, err := dat2img.Dat2Image(b) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{ - "error": "Failed to parse .dat file", - "reason": err.Error(), - "path": absolutePath, - }) - return - } - - // Save the decrypted file. s.saveDecryptedFile handles the existence check. - s.saveDecryptedFile(absolutePath, out, ext) - - // Build the new relative path and redirect - newRelativePath = relativePathBase + "." + ext - c.Redirect(http.StatusFound, "/data/"+newRelativePath) + s.handleImageFile(c, filepath.Join(s.conf.GetDataDir(), media.Path)) return - default: // For other types, keep the old redirect logic c.Redirect(http.StatusFound, "/data/"+media.Path) @@ -564,6 +539,195 @@ func (s *Service) findPath(_type string, key string) (string, error) { return "", errors.ErrMediaNotFound } +// findImageByMD5 searches for an image file by MD5 in the msg/attach directory +// It tries different suffixes: _h.dat, .dat, _t.dat +func (s *Service) findImageByMD5(md5 string) string { + dataDir := s.conf.GetDataDir() + attachDir := filepath.Join(dataDir, "msg", "attach") + + // Check if attach directory exists + if _, err := os.Stat(attachDir); os.IsNotExist(err) { + return "" + } + + var foundPath string + + // Walk through the attach directory to find files matching the md5 + err := filepath.Walk(attachDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + // Skip directories we can't access + if os.IsPermission(err) { + return filepath.SkipDir + } + return err + } + + // Stop if we already found a file + if foundPath != "" { + return io.EOF + } + + // Skip directories + if info.IsDir() { + return nil + } + + // Check if file name contains the md5 + baseName := strings.ToLower(filepath.Base(path)) + if !strings.Contains(baseName, strings.ToLower(md5)) { + return nil + } + + // Check if it's a .dat file + if !strings.HasSuffix(baseName, ".dat") { + return nil + } + + // Try to read and verify the file + if _, err := os.Stat(path); err == nil { + foundPath = path + return io.EOF + } + + return nil + }) + + // If we found io.EOF, it means we found the file + if err == io.EOF && foundPath != "" { + return foundPath + } + + return "" +} + +// getMD5FromCache retrieves path from md5->path cache +func (s *Service) getMD5FromCache(md5 string) string { + s.md5PathMu.RLock() + defer s.md5PathMu.RUnlock() + + if path, ok := s.md5PathCache[md5]; ok { + log.Debug().Str("md5", md5).Str("path", path).Msg("Cache hit for md5") + return path + } + + log.Debug().Str("md5", md5).Msg("Cache miss for md5") + return "" +} + +// tryFindFileWithSuffixes tries to find a file with different suffixes +// Priority: .dat (original) -> _h.dat (HD) -> _t.dat (thumbnail) +func (s *Service) tryFindFileWithSuffixes(basePath string) string { + dataDir := s.conf.GetDataDir() + + // Try different suffixes with priority: original -> HD -> thumbnail + suffixes := []string{".dat", "_h.dat", "_t.dat"} + + for _, suffix := range suffixes { + testPath := filepath.Join(dataDir, basePath+suffix) + if _, err := os.Stat(testPath); err == nil { + log.Debug().Str("path", testPath).Str("suffix", suffix).Msg("Found file with suffix") + return testPath + } + } + + // Try without any suffix (might already have extension) + testPath := filepath.Join(dataDir, basePath) + if _, err := os.Stat(testPath); err == nil { + log.Debug().Str("path", testPath).Msg("Found file without suffix") + return testPath + } + + log.Debug().Str("basePath", basePath).Msg("File not found with any suffix") + return "" +} + +// populateMD5PathCache populates the md5->path cache from messages +func (s *Service) populateMD5PathCache(messages []*model.Message) { + s.md5PathMu.Lock() + defer s.md5PathMu.Unlock() + + for _, msg := range messages { + if msg.Contents == nil { + continue + } + + // Only cache for image, video, and file types + if msg.Type != model.MessageTypeImage && + msg.Type != model.MessageTypeVideo && + msg.Type != model.MessageTypeVoice { + continue + } + + // Get md5 from contents + md5Value, md5Ok := msg.Contents["md5"].(string) + if !md5Ok || md5Value == "" { + continue + } + + // Get path from contents + pathValue, pathOk := msg.Contents["path"].(string) + if pathOk && pathValue != "" { + s.md5PathCache[md5Value] = pathValue + log.Debug().Str("md5", md5Value).Str("path", pathValue).Msg("Cached md5->path mapping") + } + } +} + +// handleImageFile processes an image file, handling decryption if it's a .dat file +func (s *Service) handleImageFile(c *gin.Context, absolutePath string) { + // If it's not a .dat file, redirect to the data handler + if !strings.HasSuffix(strings.ToLower(absolutePath), ".dat") { + relativePath := strings.TrimPrefix(absolutePath, s.conf.GetDataDir()) + relativePath = strings.TrimPrefix(relativePath, string(filepath.Separator)) + c.Redirect(http.StatusFound, "/data/"+relativePath) + return + } + + // Check if already converted + outputPath := strings.TrimSuffix(absolutePath, filepath.Ext(absolutePath)) + var newRelativePath string + relativePathBase := strings.TrimPrefix(outputPath, s.conf.GetDataDir()) + relativePathBase = strings.TrimPrefix(relativePathBase, string(filepath.Separator)) + + // Check if a converted file already exists + for _, ext := range []string{".jpg", ".png", ".gif", ".jpeg", ".bmp"} { + if _, err := os.Stat(outputPath + ext); err == nil { + newRelativePath = relativePathBase + ext + break + } + } + + // If a converted file is found, redirect to it immediately + if newRelativePath != "" { + c.Redirect(http.StatusFound, "/data/"+newRelativePath) + return + } + + // Decrypt and convert the .dat file + b, err := os.ReadFile(absolutePath) + if err != nil { + errors.Err(c, err) + return + } + + out, ext, err := dat2img.Dat2Image(b) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "error": "Failed to parse .dat file", + "reason": err.Error(), + "path": absolutePath, + }) + return + } + + // Save the decrypted file + s.saveDecryptedFile(absolutePath, out, ext) + + // Build the new relative path and redirect + newRelativePath = relativePathBase + "." + ext + c.Redirect(http.StatusFound, "/data/"+newRelativePath) +} + func (s *Service) handleMediaData(c *gin.Context) { relativePath := filepath.Clean(c.Param("path")) diff --git a/internal/chatlog/http/service.go b/internal/chatlog/http/service.go index 6009cc0..d31640c 100644 --- a/internal/chatlog/http/service.go +++ b/internal/chatlog/http/service.go @@ -40,6 +40,10 @@ type Service struct { mcpSubscriptions map[string]*Subscription mcpSubMu sync.RWMutex + // md5 到 path 的缓存(用于图片、视频等媒体文件) + md5PathCache map[string]string + md5PathMu sync.RWMutex + lastPushTime time.Time lastPushTalker string @@ -74,6 +78,7 @@ func NewService(conf Config, db *database.Service) *Service { db: db, router: router, mcpSubscriptions: make(map[string]*Subscription), + md5PathCache: make(map[string]string), subscriptionPath: filepath.Join(conf.GetDataDir(), "subscriptions.json"), }