feat: 实现语音转文字并支持流式输出；

fix: 修复了语音解密失败的问题
2026-03-23 23:01:21 +08:00 · 2026-01-17 14:16:54 +08:00
parent 650de55202
commit e8babd48b6
33 changed files with 1713 additions and 570 deletions
--- a/electron/services/chatService.ts
+++ b/electron/services/chatService.ts
@@ -7,11 +7,7 @@ import * as http from 'http'
 import * as fzstd from 'fzstd'
 import * as crypto from 'crypto'
 import Database from 'better-sqlite3'
-import { execFile } from 'child_process'
-import { promisify } from 'util'
 import { app } from 'electron'
-
-const execFileAsync = promisify(execFile)
 import { ConfigService } from './config'
 import { wcdbService } from './wcdbService'
 import { MessageCacheService } from './messageCacheService'
@@ -2149,7 +2145,107 @@ class ChatService {
    }
  }

-  async getVoiceData(sessionId: string, msgId: string): Promise<{ success: boolean; data?: string; error?: string }> {
+  /**
+   * getVoiceData (优化的 C++ 实现 + 文件缓存)
+   */
+  async getVoiceData(sessionId: string, msgId: string, createTime?: number, serverId?: string | number): Promise<{ success: boolean; data?: string; error?: string }> {
+
+    try {
+      const localId = parseInt(msgId, 10)
+      if (isNaN(localId)) {
+        return { success: false, error: '无效的消息ID' }
+      }
+
+      // 检查文件缓存
+      const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
+      const cachedFile = this.getVoiceCacheFilePath(cacheKey)
+      if (existsSync(cachedFile)) {
+        try {
+          const wavData = readFileSync(cachedFile)
+          console.info('[ChatService][Voice] 使用缓存文件:', cachedFile)
+          return { success: true, data: wavData.toString('base64') }
+        } catch (e) {
+          console.error('[ChatService][Voice] 读取缓存失败:', e)
+          // 继续重新解密
+        }
+      }
+
+      // 1. 确定 createTime 和 svrId
+      let msgCreateTime = createTime
+      let msgSvrId: string | number = serverId || 0
+
+      // 如果提供了传来的参数，验证其有效性
+      if (!msgCreateTime || msgCreateTime === 0) {
+        const msgResult = await this.getMessageByLocalId(sessionId, localId)
+        if (msgResult.success && msgResult.message) {
+          const msg = msgResult.message as any
+          msgCreateTime = msg.createTime || msg.create_time
+          // 尝试获取各种可能的 server id 列名 (只有在没有传入 serverId 时才查找)
+          if (!msgSvrId || msgSvrId === 0) {
+            msgSvrId = msg.serverId || msg.svr_id || msg.msg_svr_id || msg.message_id || 0
+          }
+        }
+      }
+
+      if (!msgCreateTime) {
+        return { success: false, error: '未找到消息时间戳' }
+      }
+
+      // 2. 构建查找候选 (sessionId, myWxid)
+      const candidates: string[] = []
+      if (sessionId) candidates.push(sessionId)
+      const myWxid = this.configService.get('myWxid') as string
+      if (myWxid && !candidates.includes(myWxid)) {
+        candidates.push(myWxid)
+      }
+
+
+
+      // 3. 调用 C++ 接口获取语音 (Hex)
+      const voiceRes = await wcdbService.getVoiceData(sessionId, msgCreateTime, candidates, msgSvrId)
+      if (!voiceRes.success || !voiceRes.hex) {
+        return { success: false, error: voiceRes.error || '未找到语音数据' }
+      }
+
+
+
+      // 4. Hex 转 Buffer (Silk)
+      const silkData = Buffer.from(voiceRes.hex, 'hex')
+
+      // 5. 使用 silk-wasm 解码
+      try {
+        const pcmData = await this.decodeSilkToPcm(silkData, 24000)
+        if (!pcmData) {
+          return { success: false, error: 'Silk 解码失败' }
+        }
+
+        // PCM -> WAV
+        const wavData = this.createWavBuffer(pcmData, 24000)
+
+        // 保存到文件缓存
+        try {
+          this.saveVoiceCache(cacheKey, wavData)
+          console.info('[ChatService][Voice] 已保存缓存:', cachedFile)
+        } catch (e) {
+          console.error('[ChatService][Voice] 保存缓存失败:', e)
+          // 不影响返回
+        }
+
+        // 缓存 WAV 数据 (内存缓存)
+        this.cacheVoiceWav(cacheKey, wavData)
+
+        return { success: true, data: wavData.toString('base64') }
+      } catch (e) {
+        console.error('[ChatService][Voice] decoding error:', e)
+        return { success: false, error: '语音解码失败: ' + String(e) }
+      }
+    } catch (e) {
+      console.error('ChatService: getVoiceData 失败:', e)
+      return { success: false, error: String(e) }
+    }
+  }
+
+  async getVoiceData_Legacy(sessionId: string, msgId: string): Promise<{ success: boolean; data?: string; error?: string }> {
    try {
      const localId = parseInt(msgId, 10)
      const msgResult = await this.getMessageByLocalId(sessionId, localId)
@@ -2187,12 +2283,10 @@ class ChatService {
      for (const dbPath of (mediaDbs.data || [])) {
        const voiceTable = await this.resolveVoiceInfoTableName(dbPath)
        if (!voiceTable) {
-          console.warn('[ChatService][Voice] voice table not found', dbPath)
          continue
        }
        const columns = await this.resolveVoiceInfoColumns(dbPath, voiceTable)
        if (!columns) {
-          console.warn('[ChatService][Voice] voice columns not found', { dbPath, voiceTable })
          continue
        }
        for (const candidate of candidates) {
@@ -2233,52 +2327,44 @@ class ChatService {
          }
        }
        if (silkData) break
+
+        // 策略 3: 只使用 CreateTime (兜底)
+        if (!silkData && columns.createTimeColumn) {
+          const whereClause = `${columns.createTimeColumn} = ${msg.createTime}`
+          const sql = `SELECT ${columns.dataColumn} AS data FROM ${voiceTable} WHERE ${whereClause} LIMIT 1`
+          const result = await wcdbService.execQuery('media', dbPath, sql)
+          if (result.success && result.rows && result.rows.length > 0) {
+            const raw = result.rows[0]?.data
+            const decoded = this.decodeVoiceBlob(raw)
+            if (decoded && decoded.length > 0) {
+              console.info('[ChatService][Voice] hit by createTime only', { dbPath, voiceTable, whereClause, bytes: decoded.length })
+              silkData = decoded
+            }
+          }
+        }
+        if (silkData) break
      }

      if (!silkData) return { success: false, error: '未找到语音数据' }

-      // 4. 解码 Silk -> PCM -> WAV
-      const resourcesPath = app.isPackaged
-        ? join(process.resourcesPath, 'resources')
-        : join(app.getAppPath(), 'resources')
-      const decoderPath = join(resourcesPath, 'silk_v3_decoder.exe')
-
-      if (!existsSync(decoderPath)) {
-        return { success: false, error: '找不到语音解码器 (silk_v3_decoder.exe)' }
-      }
-      console.info('[ChatService][Voice] decoder path', decoderPath)
-
-      const tempDir = app.getPath('temp')
-      const silkFile = join(tempDir, `voice_${msgId}.silk`)
-      const pcmFile = join(tempDir, `voice_${msgId}.pcm`)
-
+      // 4. 使用 silk-wasm 解码
      try {
-        writeFileSync(silkFile, silkData)
-        // 执行解码: silk_v3_decoder.exe <silk> <pcm> -Fs_API 24000
-        console.info('[ChatService][Voice] executing decoder:', decoderPath, [silkFile, pcmFile])
-        const { stdout, stderr } = await execFileAsync(
-          decoderPath,
-          [silkFile, pcmFile, '-Fs_API', '24000'],
-          { cwd: dirname(decoderPath) }
-        )
-        if (stdout && stdout.trim()) console.info('[ChatService][Voice] decoder stdout:', stdout)
-        if (stderr && stderr.trim()) console.warn('[ChatService][Voice] decoder stderr:', stderr)
-
-        if (!existsSync(pcmFile)) {
-          return { success: false, error: '语音解码失败' }
+        const pcmData = await this.decodeSilkToPcm(silkData, 24000)
+        if (!pcmData) {
+          return { success: false, error: 'Silk 解码失败' }
        }

-        const pcmData = readFileSync(pcmFile)
-        const wavHeader = this.createWavHeader(pcmData.length, 24000, 1) // 微信语音通常 24kHz
-        const wavData = Buffer.concat([wavHeader, pcmData])
+        // PCM -> WAV
+        const wavData = this.createWavBuffer(pcmData, 24000)
+
+        // 缓存 WAV 数据 (内存缓存)
        const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
        this.cacheVoiceWav(cacheKey, wavData)

        return { success: true, data: wavData.toString('base64') }
-      } finally {
-        // 清理临时文件
-        try { if (existsSync(silkFile)) unlinkSync(silkFile) } catch { }
-        try { if (existsSync(pcmFile)) unlinkSync(pcmFile) } catch { }
+      } catch (e) {
+        console.error('[ChatService][Voice] decoding error:', e)
+        return { success: false, error: '语音解码失败: ' + String(e) }
      }
    } catch (e) {
      console.error('ChatService: getVoiceData 失败:', e)
@@ -2286,7 +2372,69 @@ class ChatService {
    }
  }

-  async getVoiceTranscript(sessionId: string, msgId: string): Promise<{ success: boolean; transcript?: string; error?: string }> {
+
+
+  /**
+   * 解码 Silk 数据为 PCM (silk-wasm)
+   */
+  private async decodeSilkToPcm(silkData: Buffer, sampleRate: number): Promise<Buffer | null> {
+    try {
+      let wasmPath: string
+      if (app.isPackaged) {
+        wasmPath = join(process.resourcesPath, 'app.asar.unpacked', 'node_modules', 'silk-wasm', 'lib', 'silk.wasm')
+        if (!existsSync(wasmPath)) {
+          wasmPath = join(process.resourcesPath, 'node_modules', 'silk-wasm', 'lib', 'silk.wasm')
+        }
+      } else {
+        wasmPath = join(app.getAppPath(), 'node_modules', 'silk-wasm', 'lib', 'silk.wasm')
+      }
+
+      if (!existsSync(wasmPath)) {
+        console.error('[ChatService][Voice] silk.wasm not found at:', wasmPath)
+        return null
+      }
+
+      const silkWasm = require('silk-wasm')
+      if (!silkWasm || !silkWasm.decode) {
+        console.error('[ChatService][Voice] silk-wasm module invalid')
+        return null
+      }
+
+      const result = await silkWasm.decode(silkData, sampleRate)
+      return Buffer.from(result.data)
+    } catch (e) {
+      console.error('[ChatService][Voice] internal decode error:', e)
+      return null
+    }
+  }
+
+  /**
+   * 创建 WAV 文件 Buffer
+   */
+  private createWavBuffer(pcmData: Buffer, sampleRate: number = 24000, channels: number = 1): Buffer {
+    const pcmLength = pcmData.length
+    const header = Buffer.alloc(44)
+    header.write('RIFF', 0)
+    header.writeUInt32LE(36 + pcmLength, 4)
+    header.write('WAVE', 8)
+    header.write('fmt ', 12)
+    header.writeUInt32LE(16, 16)
+    header.writeUInt16LE(1, 20)
+    header.writeUInt16LE(channels, 22)
+    header.writeUInt32LE(sampleRate, 24)
+    header.writeUInt32LE(sampleRate * channels * 2, 28)
+    header.writeUInt16LE(channels * 2, 32)
+    header.writeUInt16LE(16, 34)
+    header.write('data', 36)
+    header.writeUInt32LE(pcmLength, 40)
+    return Buffer.concat([header, pcmData])
+  }
+
+  async getVoiceTranscript(
+    sessionId: string,
+    msgId: string,
+    onPartial?: (text: string) => void
+  ): Promise<{ success: boolean; transcript?: string; error?: string }> {
    const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
    const cached = this.voiceTranscriptCache.get(cacheKey)
    if (cached) {
@@ -2302,14 +2450,25 @@ class ChatService {
      try {
        let wavData = this.voiceWavCache.get(cacheKey)
        if (!wavData) {
-          const voiceResult = await this.getVoiceData(sessionId, msgId)
+          // 获取消息详情以拿到 createTime 和 serverId
+          let cTime: number | undefined
+          let sId: string | number | undefined
+          const msgResult = await this.getMessageById(sessionId, parseInt(msgId, 10))
+          if (msgResult.success && msgResult.message) {
+            cTime = msgResult.message.createTime
+            sId = msgResult.message.serverId
+          }
+
+          const voiceResult = await this.getVoiceData(sessionId, msgId, cTime, sId)
          if (!voiceResult.success || !voiceResult.data) {
            return { success: false, error: voiceResult.error || '语音解码失败' }
          }
          wavData = Buffer.from(voiceResult.data, 'base64')
        }

-        const result = await voiceTranscribeService.transcribeWavBuffer(wavData)
+        const result = await voiceTranscribeService.transcribeWavBuffer(wavData, (text) => {
+          onPartial?.(text)
+        })
        if (result.success && result.transcript) {
          this.cacheVoiceTranscript(cacheKey, result.transcript)
        }
@@ -2325,26 +2484,10 @@ class ChatService {
    return task
  }

-  private createWavHeader(pcmLength: number, sampleRate: number = 24000, channels: number = 1): Buffer {
-    const header = Buffer.alloc(44)
-    header.write('RIFF', 0)
-    header.writeUInt32LE(36 + pcmLength, 4)
-    header.write('WAVE', 8)
-    header.write('fmt ', 12)
-    header.writeUInt32LE(16, 16)
-    header.writeUInt16LE(1, 20)
-    header.writeUInt16LE(channels, 22)
-    header.writeUInt32LE(sampleRate, 24)
-    header.writeUInt32LE(sampleRate * channels * 2, 28)
-    header.writeUInt16LE(channels * 2, 32)
-    header.writeUInt16LE(16, 34)
-    header.write('data', 36)
-    header.writeUInt32LE(pcmLength, 40)
-    return header
-  }
+

  private getVoiceCacheKey(sessionId: string, msgId: string): string {
-    return `${sessionId}:${msgId}`
+    return `${sessionId}_${msgId}`
  }

  private cacheVoiceWav(cacheKey: string, wavData: Buffer): void {
@@ -2355,6 +2498,32 @@ class ChatService {
    }
  }

+  /**
+   * 获取语音缓存文件路径
+   */
+  private getVoiceCacheFilePath(cacheKey: string): string {
+    const cachePath = this.configService.get('cachePath') as string | undefined
+    let baseDir: string
+    if (cachePath && cachePath.trim()) {
+      baseDir = join(cachePath, 'Voices')
+    } else {
+      const documentsPath = app.getPath('documents')
+      baseDir = join(documentsPath, 'WeFlow', 'Voices')
+    }
+    if (!existsSync(baseDir)) {
+      mkdirSync(baseDir, { recursive: true })
+    }
+    return join(baseDir, `${cacheKey}.wav`)
+  }
+
+  /**
+   * 保存语音到文件缓存
+   */
+  private saveVoiceCache(cacheKey: string, wavData: Buffer): void {
+    const filePath = this.getVoiceCacheFilePath(cacheKey)
+    writeFileSync(filePath, wavData)
+  }
+
  private cacheVoiceTranscript(cacheKey: string, transcript: string): void {
    this.voiceTranscriptCache.set(cacheKey, transcript)
    if (this.voiceTranscriptCache.size > this.voiceCacheMaxEntries) {