feat: 实现语音转文字并支持流式输出;

fix: 修复了语音解密失败的问题
This commit is contained in:
cc
2026-01-17 14:16:54 +08:00
parent 650de55202
commit e8babd48b6
33 changed files with 1713 additions and 570 deletions

View File

@@ -7,11 +7,7 @@ import * as http from 'http'
import * as fzstd from 'fzstd'
import * as crypto from 'crypto'
import Database from 'better-sqlite3'
import { execFile } from 'child_process'
import { promisify } from 'util'
import { app } from 'electron'
const execFileAsync = promisify(execFile)
import { ConfigService } from './config'
import { wcdbService } from './wcdbService'
import { MessageCacheService } from './messageCacheService'
@@ -2149,7 +2145,107 @@ class ChatService {
}
}
async getVoiceData(sessionId: string, msgId: string): Promise<{ success: boolean; data?: string; error?: string }> {
/**
* getVoiceData (优化的 C++ 实现 + 文件缓存)
*/
async getVoiceData(sessionId: string, msgId: string, createTime?: number, serverId?: string | number): Promise<{ success: boolean; data?: string; error?: string }> {
try {
const localId = parseInt(msgId, 10)
if (isNaN(localId)) {
return { success: false, error: '无效的消息ID' }
}
// 检查文件缓存
const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
const cachedFile = this.getVoiceCacheFilePath(cacheKey)
if (existsSync(cachedFile)) {
try {
const wavData = readFileSync(cachedFile)
console.info('[ChatService][Voice] 使用缓存文件:', cachedFile)
return { success: true, data: wavData.toString('base64') }
} catch (e) {
console.error('[ChatService][Voice] 读取缓存失败:', e)
// 继续重新解密
}
}
// 1. 确定 createTime 和 svrId
let msgCreateTime = createTime
let msgSvrId: string | number = serverId || 0
// 如果提供了传来的参数,验证其有效性
if (!msgCreateTime || msgCreateTime === 0) {
const msgResult = await this.getMessageByLocalId(sessionId, localId)
if (msgResult.success && msgResult.message) {
const msg = msgResult.message as any
msgCreateTime = msg.createTime || msg.create_time
// 尝试获取各种可能的 server id 列名 (只有在没有传入 serverId 时才查找)
if (!msgSvrId || msgSvrId === 0) {
msgSvrId = msg.serverId || msg.svr_id || msg.msg_svr_id || msg.message_id || 0
}
}
}
if (!msgCreateTime) {
return { success: false, error: '未找到消息时间戳' }
}
// 2. 构建查找候选 (sessionId, myWxid)
const candidates: string[] = []
if (sessionId) candidates.push(sessionId)
const myWxid = this.configService.get('myWxid') as string
if (myWxid && !candidates.includes(myWxid)) {
candidates.push(myWxid)
}
// 3. 调用 C++ 接口获取语音 (Hex)
const voiceRes = await wcdbService.getVoiceData(sessionId, msgCreateTime, candidates, msgSvrId)
if (!voiceRes.success || !voiceRes.hex) {
return { success: false, error: voiceRes.error || '未找到语音数据' }
}
// 4. Hex 转 Buffer (Silk)
const silkData = Buffer.from(voiceRes.hex, 'hex')
// 5. 使用 silk-wasm 解码
try {
const pcmData = await this.decodeSilkToPcm(silkData, 24000)
if (!pcmData) {
return { success: false, error: 'Silk 解码失败' }
}
// PCM -> WAV
const wavData = this.createWavBuffer(pcmData, 24000)
// 保存到文件缓存
try {
this.saveVoiceCache(cacheKey, wavData)
console.info('[ChatService][Voice] 已保存缓存:', cachedFile)
} catch (e) {
console.error('[ChatService][Voice] 保存缓存失败:', e)
// 不影响返回
}
// 缓存 WAV 数据 (内存缓存)
this.cacheVoiceWav(cacheKey, wavData)
return { success: true, data: wavData.toString('base64') }
} catch (e) {
console.error('[ChatService][Voice] decoding error:', e)
return { success: false, error: '语音解码失败: ' + String(e) }
}
} catch (e) {
console.error('ChatService: getVoiceData 失败:', e)
return { success: false, error: String(e) }
}
}
async getVoiceData_Legacy(sessionId: string, msgId: string): Promise<{ success: boolean; data?: string; error?: string }> {
try {
const localId = parseInt(msgId, 10)
const msgResult = await this.getMessageByLocalId(sessionId, localId)
@@ -2187,12 +2283,10 @@ class ChatService {
for (const dbPath of (mediaDbs.data || [])) {
const voiceTable = await this.resolveVoiceInfoTableName(dbPath)
if (!voiceTable) {
console.warn('[ChatService][Voice] voice table not found', dbPath)
continue
}
const columns = await this.resolveVoiceInfoColumns(dbPath, voiceTable)
if (!columns) {
console.warn('[ChatService][Voice] voice columns not found', { dbPath, voiceTable })
continue
}
for (const candidate of candidates) {
@@ -2233,52 +2327,44 @@ class ChatService {
}
}
if (silkData) break
// 策略 3: 只使用 CreateTime (兜底)
if (!silkData && columns.createTimeColumn) {
const whereClause = `${columns.createTimeColumn} = ${msg.createTime}`
const sql = `SELECT ${columns.dataColumn} AS data FROM ${voiceTable} WHERE ${whereClause} LIMIT 1`
const result = await wcdbService.execQuery('media', dbPath, sql)
if (result.success && result.rows && result.rows.length > 0) {
const raw = result.rows[0]?.data
const decoded = this.decodeVoiceBlob(raw)
if (decoded && decoded.length > 0) {
console.info('[ChatService][Voice] hit by createTime only', { dbPath, voiceTable, whereClause, bytes: decoded.length })
silkData = decoded
}
}
}
if (silkData) break
}
if (!silkData) return { success: false, error: '未找到语音数据' }
// 4. 解码 Silk -> PCM -> WAV
const resourcesPath = app.isPackaged
? join(process.resourcesPath, 'resources')
: join(app.getAppPath(), 'resources')
const decoderPath = join(resourcesPath, 'silk_v3_decoder.exe')
if (!existsSync(decoderPath)) {
return { success: false, error: '找不到语音解码器 (silk_v3_decoder.exe)' }
}
console.info('[ChatService][Voice] decoder path', decoderPath)
const tempDir = app.getPath('temp')
const silkFile = join(tempDir, `voice_${msgId}.silk`)
const pcmFile = join(tempDir, `voice_${msgId}.pcm`)
// 4. 使用 silk-wasm 解码
try {
writeFileSync(silkFile, silkData)
// 执行解码: silk_v3_decoder.exe <silk> <pcm> -Fs_API 24000
console.info('[ChatService][Voice] executing decoder:', decoderPath, [silkFile, pcmFile])
const { stdout, stderr } = await execFileAsync(
decoderPath,
[silkFile, pcmFile, '-Fs_API', '24000'],
{ cwd: dirname(decoderPath) }
)
if (stdout && stdout.trim()) console.info('[ChatService][Voice] decoder stdout:', stdout)
if (stderr && stderr.trim()) console.warn('[ChatService][Voice] decoder stderr:', stderr)
if (!existsSync(pcmFile)) {
return { success: false, error: '语音解码失败' }
const pcmData = await this.decodeSilkToPcm(silkData, 24000)
if (!pcmData) {
return { success: false, error: 'Silk 解码失败' }
}
const pcmData = readFileSync(pcmFile)
const wavHeader = this.createWavHeader(pcmData.length, 24000, 1) // 微信语音通常 24kHz
const wavData = Buffer.concat([wavHeader, pcmData])
// PCM -> WAV
const wavData = this.createWavBuffer(pcmData, 24000)
// 缓存 WAV 数据 (内存缓存)
const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
this.cacheVoiceWav(cacheKey, wavData)
return { success: true, data: wavData.toString('base64') }
} finally {
// 清理临时文件
try { if (existsSync(silkFile)) unlinkSync(silkFile) } catch { }
try { if (existsSync(pcmFile)) unlinkSync(pcmFile) } catch { }
} catch (e) {
console.error('[ChatService][Voice] decoding error:', e)
return { success: false, error: '语音解码失败: ' + String(e) }
}
} catch (e) {
console.error('ChatService: getVoiceData 失败:', e)
@@ -2286,7 +2372,69 @@ class ChatService {
}
}
async getVoiceTranscript(sessionId: string, msgId: string): Promise<{ success: boolean; transcript?: string; error?: string }> {
/**
* 解码 Silk 数据为 PCM (silk-wasm)
*/
private async decodeSilkToPcm(silkData: Buffer, sampleRate: number): Promise<Buffer | null> {
try {
let wasmPath: string
if (app.isPackaged) {
wasmPath = join(process.resourcesPath, 'app.asar.unpacked', 'node_modules', 'silk-wasm', 'lib', 'silk.wasm')
if (!existsSync(wasmPath)) {
wasmPath = join(process.resourcesPath, 'node_modules', 'silk-wasm', 'lib', 'silk.wasm')
}
} else {
wasmPath = join(app.getAppPath(), 'node_modules', 'silk-wasm', 'lib', 'silk.wasm')
}
if (!existsSync(wasmPath)) {
console.error('[ChatService][Voice] silk.wasm not found at:', wasmPath)
return null
}
const silkWasm = require('silk-wasm')
if (!silkWasm || !silkWasm.decode) {
console.error('[ChatService][Voice] silk-wasm module invalid')
return null
}
const result = await silkWasm.decode(silkData, sampleRate)
return Buffer.from(result.data)
} catch (e) {
console.error('[ChatService][Voice] internal decode error:', e)
return null
}
}
/**
* 创建 WAV 文件 Buffer
*/
private createWavBuffer(pcmData: Buffer, sampleRate: number = 24000, channels: number = 1): Buffer {
const pcmLength = pcmData.length
const header = Buffer.alloc(44)
header.write('RIFF', 0)
header.writeUInt32LE(36 + pcmLength, 4)
header.write('WAVE', 8)
header.write('fmt ', 12)
header.writeUInt32LE(16, 16)
header.writeUInt16LE(1, 20)
header.writeUInt16LE(channels, 22)
header.writeUInt32LE(sampleRate, 24)
header.writeUInt32LE(sampleRate * channels * 2, 28)
header.writeUInt16LE(channels * 2, 32)
header.writeUInt16LE(16, 34)
header.write('data', 36)
header.writeUInt32LE(pcmLength, 40)
return Buffer.concat([header, pcmData])
}
async getVoiceTranscript(
sessionId: string,
msgId: string,
onPartial?: (text: string) => void
): Promise<{ success: boolean; transcript?: string; error?: string }> {
const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
const cached = this.voiceTranscriptCache.get(cacheKey)
if (cached) {
@@ -2302,14 +2450,25 @@ class ChatService {
try {
let wavData = this.voiceWavCache.get(cacheKey)
if (!wavData) {
const voiceResult = await this.getVoiceData(sessionId, msgId)
// 获取消息详情以拿到 createTime 和 serverId
let cTime: number | undefined
let sId: string | number | undefined
const msgResult = await this.getMessageById(sessionId, parseInt(msgId, 10))
if (msgResult.success && msgResult.message) {
cTime = msgResult.message.createTime
sId = msgResult.message.serverId
}
const voiceResult = await this.getVoiceData(sessionId, msgId, cTime, sId)
if (!voiceResult.success || !voiceResult.data) {
return { success: false, error: voiceResult.error || '语音解码失败' }
}
wavData = Buffer.from(voiceResult.data, 'base64')
}
const result = await voiceTranscribeService.transcribeWavBuffer(wavData)
const result = await voiceTranscribeService.transcribeWavBuffer(wavData, (text) => {
onPartial?.(text)
})
if (result.success && result.transcript) {
this.cacheVoiceTranscript(cacheKey, result.transcript)
}
@@ -2325,26 +2484,10 @@ class ChatService {
return task
}
private createWavHeader(pcmLength: number, sampleRate: number = 24000, channels: number = 1): Buffer {
const header = Buffer.alloc(44)
header.write('RIFF', 0)
header.writeUInt32LE(36 + pcmLength, 4)
header.write('WAVE', 8)
header.write('fmt ', 12)
header.writeUInt32LE(16, 16)
header.writeUInt16LE(1, 20)
header.writeUInt16LE(channels, 22)
header.writeUInt32LE(sampleRate, 24)
header.writeUInt32LE(sampleRate * channels * 2, 28)
header.writeUInt16LE(channels * 2, 32)
header.writeUInt16LE(16, 34)
header.write('data', 36)
header.writeUInt32LE(pcmLength, 40)
return header
}
private getVoiceCacheKey(sessionId: string, msgId: string): string {
return `${sessionId}:${msgId}`
return `${sessionId}_${msgId}`
}
private cacheVoiceWav(cacheKey: string, wavData: Buffer): void {
@@ -2355,6 +2498,32 @@ class ChatService {
}
}
/**
* 获取语音缓存文件路径
*/
private getVoiceCacheFilePath(cacheKey: string): string {
const cachePath = this.configService.get('cachePath') as string | undefined
let baseDir: string
if (cachePath && cachePath.trim()) {
baseDir = join(cachePath, 'Voices')
} else {
const documentsPath = app.getPath('documents')
baseDir = join(documentsPath, 'WeFlow', 'Voices')
}
if (!existsSync(baseDir)) {
mkdirSync(baseDir, { recursive: true })
}
return join(baseDir, `${cacheKey}.wav`)
}
/**
* 保存语音到文件缓存
*/
private saveVoiceCache(cacheKey: string, wavData: Buffer): void {
const filePath = this.getVoiceCacheFilePath(cacheKey)
writeFileSync(filePath, wavData)
}
private cacheVoiceTranscript(cacheKey: string, transcript: string): void {
this.voiceTranscriptCache.set(cacheKey, transcript)
if (this.voiceTranscriptCache.size > this.voiceCacheMaxEntries) {