feat: 尝试增加一下聊天里面的语音转文字功能

This commit is contained in:
xuncha
2026-01-17 05:14:14 +08:00
parent 095c8f0db6
commit 72e2d82158
18 changed files with 999 additions and 66 deletions

View File

@@ -15,6 +15,7 @@ import { groupAnalyticsService } from './services/groupAnalyticsService'
import { annualReportService } from './services/annualReportService'
import { exportService, ExportOptions } from './services/exportService'
import { KeyService } from './services/keyService'
import { voiceTranscribeService } from './services/voiceTranscribeService'
// 配置自动更新
@@ -442,6 +443,10 @@ function registerIpcHandlers() {
return chatService.getVoiceData(sessionId, msgId)
})
ipcMain.handle('chat:getVoiceTranscript', async (_, sessionId: string, msgId: string) => {
return chatService.getVoiceTranscript(sessionId, msgId)
})
ipcMain.handle('chat:getMessageById', async (_, sessionId: string, localId: number) => {
return chatService.getMessageById(sessionId, localId)
})
@@ -516,6 +521,16 @@ function registerIpcHandlers() {
return { success: true }
})
ipcMain.handle('whisper:downloadModel', async (event, payload: { modelName: string; downloadDir?: string; source?: string }) => {
return voiceTranscribeService.downloadModel(payload, (progress) => {
event.sender.send('whisper:downloadProgress', progress)
})
})
ipcMain.handle('whisper:getModelStatus', async (_, payload: { modelName: string; downloadDir?: string }) => {
return voiceTranscribeService.getModelStatus(payload)
})
// 群聊分析相关
ipcMain.handle('groupAnalytics:getGroupChats', async () => {
return groupAnalyticsService.getGroupChats()

View File

@@ -106,7 +106,8 @@ contextBridge.exposeInMainWorld('electronAPI', {
close: () => ipcRenderer.invoke('chat:close'),
getSessionDetail: (sessionId: string) => ipcRenderer.invoke('chat:getSessionDetail', sessionId),
getImageData: (sessionId: string, msgId: string) => ipcRenderer.invoke('chat:getImageData', sessionId, msgId),
getVoiceData: (sessionId: string, msgId: string) => ipcRenderer.invoke('chat:getVoiceData', sessionId, msgId)
getVoiceData: (sessionId: string, msgId: string) => ipcRenderer.invoke('chat:getVoiceData', sessionId, msgId),
getVoiceTranscript: (sessionId: string, msgId: string) => ipcRenderer.invoke('chat:getVoiceTranscript', sessionId, msgId)
},
@@ -174,5 +175,16 @@ contextBridge.exposeInMainWorld('electronAPI', {
ipcRenderer.invoke('export:exportSessions', sessionIds, outputDir, options),
exportSession: (sessionId: string, outputPath: string, options: any) =>
ipcRenderer.invoke('export:exportSession', sessionId, outputPath, options)
},
whisper: {
downloadModel: (payload: { modelName: string; downloadDir?: string; source?: string }) =>
ipcRenderer.invoke('whisper:downloadModel', payload),
getModelStatus: (payload: { modelName: string; downloadDir?: string }) =>
ipcRenderer.invoke('whisper:getModelStatus', payload),
onDownloadProgress: (callback: (payload: { modelName: string; downloadedBytes: number; totalBytes?: number; percent?: number }) => void) => {
ipcRenderer.on('whisper:downloadProgress', (_, payload) => callback(payload))
return () => ipcRenderer.removeAllListeners('whisper:downloadProgress')
}
}
})

View File

@@ -16,6 +16,7 @@ import { ConfigService } from './config'
import { wcdbService } from './wcdbService'
import { MessageCacheService } from './messageCacheService'
import { ContactCacheService, ContactCacheEntry } from './contactCacheService'
import { voiceTranscribeService } from './voiceTranscribeService'
type HardlinkState = {
db: Database.Database
@@ -83,6 +84,10 @@ class ChatService {
private hardlinkCache = new Map<string, HardlinkState>()
private readonly contactCacheService: ContactCacheService
private readonly messageCacheService: MessageCacheService
private voiceWavCache = new Map<string, Buffer>()
private voiceTranscriptCache = new Map<string, string>()
private voiceTranscriptPending = new Map<string, Promise<{ success: boolean; transcript?: string; error?: string }>>()
private readonly voiceCacheMaxEntries = 50
constructor() {
this.configService = new ConfigService()
@@ -1738,6 +1743,9 @@ class ChatService {
if (includeMessages) {
this.messageCacheService.clear()
this.voiceWavCache.clear()
this.voiceTranscriptCache.clear()
this.voiceTranscriptPending.clear()
}
for (const state of this.hardlinkCache.values()) {
@@ -2263,6 +2271,8 @@ class ChatService {
const pcmData = readFileSync(pcmFile)
const wavHeader = this.createWavHeader(pcmData.length, 24000, 1) // 微信语音通常 24kHz
const wavData = Buffer.concat([wavHeader, pcmData])
const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
this.cacheVoiceWav(cacheKey, wavData)
return { success: true, data: wavData.toString('base64') }
} finally {
@@ -2276,6 +2286,45 @@ class ChatService {
}
}
async getVoiceTranscript(sessionId: string, msgId: string): Promise<{ success: boolean; transcript?: string; error?: string }> {
const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
const cached = this.voiceTranscriptCache.get(cacheKey)
if (cached) {
return { success: true, transcript: cached }
}
const pending = this.voiceTranscriptPending.get(cacheKey)
if (pending) {
return pending
}
const task = (async () => {
try {
let wavData = this.voiceWavCache.get(cacheKey)
if (!wavData) {
const voiceResult = await this.getVoiceData(sessionId, msgId)
if (!voiceResult.success || !voiceResult.data) {
return { success: false, error: voiceResult.error || '语音解码失败' }
}
wavData = Buffer.from(voiceResult.data, 'base64')
}
const result = await voiceTranscribeService.transcribeWavBuffer(wavData)
if (result.success && result.transcript) {
this.cacheVoiceTranscript(cacheKey, result.transcript)
}
return result
} catch (error) {
return { success: false, error: String(error) }
} finally {
this.voiceTranscriptPending.delete(cacheKey)
}
})()
this.voiceTranscriptPending.set(cacheKey, task)
return task
}
private createWavHeader(pcmLength: number, sampleRate: number = 24000, channels: number = 1): Buffer {
const header = Buffer.alloc(44)
header.write('RIFF', 0)
@@ -2294,6 +2343,26 @@ class ChatService {
return header
}
private getVoiceCacheKey(sessionId: string, msgId: string): string {
return `${sessionId}:${msgId}`
}
private cacheVoiceWav(cacheKey: string, wavData: Buffer): void {
this.voiceWavCache.set(cacheKey, wavData)
if (this.voiceWavCache.size > this.voiceCacheMaxEntries) {
const oldestKey = this.voiceWavCache.keys().next().value
if (oldestKey) this.voiceWavCache.delete(oldestKey)
}
}
private cacheVoiceTranscript(cacheKey: string, transcript: string): void {
this.voiceTranscriptCache.set(cacheKey, transcript)
if (this.voiceTranscriptCache.size > this.voiceCacheMaxEntries) {
const oldestKey = this.voiceTranscriptCache.keys().next().value
if (oldestKey) this.voiceTranscriptCache.delete(oldestKey)
}
}
async getMessageById(sessionId: string, localId: number): Promise<{ success: boolean; message?: Message; error?: string }> {
try {
console.info('[ChatService] getMessageById (SQL)', { sessionId, localId })

View File

@@ -20,6 +20,9 @@ interface ConfigSchema {
language: string
logEnabled: boolean
llmModelPath: string
whisperModelName: string
whisperModelDir: string
whisperDownloadSource: string
}
export class ConfigService {
@@ -42,7 +45,10 @@ export class ConfigService {
themeId: 'cloud-dancer',
language: 'zh-CN',
logEnabled: false,
llmModelPath: ''
llmModelPath: '',
whisperModelName: 'base',
whisperModelDir: '',
whisperDownloadSource: 'tsinghua'
}
})
}

View File

@@ -0,0 +1,281 @@
import { app } from 'electron'
import { createWriteStream, existsSync, mkdirSync, statSync, unlinkSync, writeFileSync } from 'fs'
import { join, dirname } from 'path'
import { promisify } from 'util'
import { execFile, spawnSync } from 'child_process'
import * as https from 'https'
import * as http from 'http'
import { ConfigService } from './config'
const execFileAsync = promisify(execFile)
type WhisperModelInfo = {
name: string
fileName: string
sizeLabel: string
sizeBytes?: number
}
type DownloadProgress = {
modelName: string
downloadedBytes: number
totalBytes?: number
percent?: number
}
const WHISPER_MODELS: Record<string, WhisperModelInfo> = {
tiny: { name: 'tiny', fileName: 'ggml-tiny.bin', sizeLabel: '75 MB', sizeBytes: 75_000_000 },
base: { name: 'base', fileName: 'ggml-base.bin', sizeLabel: '142 MB', sizeBytes: 142_000_000 },
small: { name: 'small', fileName: 'ggml-small.bin', sizeLabel: '466 MB', sizeBytes: 466_000_000 },
medium: { name: 'medium', fileName: 'ggml-medium.bin', sizeLabel: '1.5 GB', sizeBytes: 1_500_000_000 },
'large-v3': { name: 'large-v3', fileName: 'ggml-large-v3.bin', sizeLabel: '2.9 GB', sizeBytes: 2_900_000_000 }
}
const WHISPER_SOURCES: Record<string, string> = {
official: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main',
tsinghua: 'https://hf-mirror.com/ggerganov/whisper.cpp/resolve/main'
}
function getStaticFfmpegPath(): string | null {
try {
// eslint-disable-next-line @typescript-eslint/no-var-requires
const ffmpegStatic = require('ffmpeg-static')
if (typeof ffmpegStatic === 'string' && existsSync(ffmpegStatic)) {
return ffmpegStatic
}
const devPath = join(process.cwd(), 'node_modules', 'ffmpeg-static', 'ffmpeg.exe')
if (existsSync(devPath)) {
return devPath
}
if (app.isPackaged) {
const resourcesPath = process.resourcesPath
const packedPath = join(resourcesPath, 'app.asar.unpacked', 'node_modules', 'ffmpeg-static', 'ffmpeg.exe')
if (existsSync(packedPath)) {
return packedPath
}
}
return null
} catch {
return null
}
}
export class VoiceTranscribeService {
private configService = new ConfigService()
private downloadTasks = new Map<string, Promise<{ success: boolean; path?: string; error?: string }>>()
private resolveModelInfo(modelName: string): WhisperModelInfo | null {
return WHISPER_MODELS[modelName] || null
}
private resolveModelDir(overrideDir?: string): string {
const configured = overrideDir || this.configService.get('whisperModelDir')
if (configured) return configured
return join(app.getPath('userData'), 'models', 'whisper')
}
private resolveModelPath(modelName: string, overrideDir?: string): string | null {
const info = this.resolveModelInfo(modelName)
if (!info) return null
return join(this.resolveModelDir(overrideDir), info.fileName)
}
private resolveSourceUrl(overrideSource?: string): string {
const configured = overrideSource || this.configService.get('whisperDownloadSource')
if (configured && WHISPER_SOURCES[configured]) return WHISPER_SOURCES[configured]
return WHISPER_SOURCES.official
}
async getModelStatus(payload: { modelName: string; downloadDir?: string }): Promise<{
success: boolean
exists?: boolean
path?: string
sizeBytes?: number
error?: string
}> {
const modelPath = this.resolveModelPath(payload.modelName, payload.downloadDir)
if (!modelPath) {
return { success: false, error: '未知模型名称' }
}
if (!existsSync(modelPath)) {
return { success: true, exists: false, path: modelPath }
}
const sizeBytes = statSync(modelPath).size
return { success: true, exists: true, path: modelPath, sizeBytes }
}
async downloadModel(
payload: { modelName: string; downloadDir?: string; source?: string },
onProgress?: (progress: DownloadProgress) => void
): Promise<{ success: boolean; path?: string; error?: string }> {
const info = this.resolveModelInfo(payload.modelName)
if (!info) {
return { success: false, error: '未知模型名称' }
}
const modelPath = this.resolveModelPath(payload.modelName, payload.downloadDir)
if (!modelPath) {
return { success: false, error: '模型路径生成失败' }
}
if (existsSync(modelPath)) {
return { success: true, path: modelPath }
}
const cacheKey = `${payload.modelName}:${modelPath}`
const pending = this.downloadTasks.get(cacheKey)
if (pending) return pending
const task = (async () => {
try {
const targetDir = this.resolveModelDir(payload.downloadDir)
if (!existsSync(targetDir)) {
mkdirSync(targetDir, { recursive: true })
}
const baseUrl = this.resolveSourceUrl(payload.source)
const url = `${baseUrl}/${info.fileName}`
await this.downloadToFile(url, modelPath, payload.modelName, onProgress)
return { success: true, path: modelPath }
} catch (error) {
try { if (existsSync(modelPath)) unlinkSync(modelPath) } catch { }
return { success: false, error: String(error) }
} finally {
this.downloadTasks.delete(cacheKey)
}
})()
this.downloadTasks.set(cacheKey, task)
return task
}
async transcribeWavBuffer(wavData: Buffer): Promise<{ success: boolean; transcript?: string; error?: string }> {
const modelName = this.configService.get('whisperModelName') || 'base'
const modelPath = this.resolveModelPath(modelName)
console.info('[VoiceTranscribe] check model', { modelName, modelPath, exists: modelPath ? existsSync(modelPath) : false })
if (!modelPath || !existsSync(modelPath)) {
return { success: false, error: '未下载语音模型,请在设置中下载' }
}
// 使用内置的预编译 whisper-cli.exe
const resourcesPath = app.isPackaged
? join(process.resourcesPath, 'resources')
: join(app.getAppPath(), 'resources')
const whisperExe = join(resourcesPath, 'whisper-cli.exe')
if (!existsSync(whisperExe)) {
return { success: false, error: '找不到语音转写程序,请重新安装应用' }
}
const ffmpegPath = getStaticFfmpegPath() || 'ffmpeg'
console.info('[VoiceTranscribe] ffmpeg path', ffmpegPath)
const tempDir = app.getPath('temp')
const fileToken = `${Date.now()}_${Math.random().toString(16).slice(2)}`
const inputPath = join(tempDir, `weflow_voice_${fileToken}.wav`)
const outputPath = join(tempDir, `weflow_voice_${fileToken}_16k.wav`)
try {
writeFileSync(inputPath, wavData)
console.info('[VoiceTranscribe] converting to 16kHz', { inputPath, outputPath })
await execFileAsync(ffmpegPath, ['-y', '-i', inputPath, '-ar', '16000', '-ac', '1', outputPath])
console.info('[VoiceTranscribe] transcribing with whisper', { whisperExe, modelPath })
const { stdout } = await execFileAsync(whisperExe, [
'-m', modelPath,
'-f', outputPath,
'-l', 'zh',
'-otxt'
], {
maxBuffer: 10 * 1024 * 1024,
cwd: tempDir
})
// 解析输出文本
const txtFile = outputPath.replace(/\.[^.]+$/, '.txt')
let transcript = ''
if (existsSync(txtFile)) {
const { readFileSync } = await import('fs')
transcript = readFileSync(txtFile, 'utf-8').trim()
unlinkSync(txtFile)
} else {
// 从 stdout 提取
const lines = stdout.split('\n').filter(line => {
const trimmed = line.trim()
return trimmed && !trimmed.startsWith('[') && !trimmed.startsWith('whisper_')
})
transcript = lines.join(' ').trim()
}
console.info('[VoiceTranscribe] success', { transcript })
return { success: true, transcript }
} catch (error) {
console.error('[VoiceTranscribe] failed', error)
return { success: false, error: String(error) }
} finally {
try { if (existsSync(inputPath)) unlinkSync(inputPath) } catch { }
try { if (existsSync(outputPath)) unlinkSync(outputPath) } catch { }
}
}
private downloadToFile(
url: string,
targetPath: string,
modelName: string,
onProgress?: (progress: DownloadProgress) => void,
remainingRedirects = 3
): Promise<void> {
return new Promise((resolve, reject) => {
const protocol = url.startsWith('https') ? https : http
const request = protocol.get(url, (response) => {
if ([301, 302, 303, 307, 308].includes(response.statusCode || 0) && response.headers.location) {
if (remainingRedirects <= 0) {
reject(new Error('下载重定向次数过多'))
return
}
this.downloadToFile(response.headers.location, targetPath, modelName, onProgress, remainingRedirects - 1)
.then(resolve)
.catch(reject)
return
}
if (response.statusCode !== 200) {
reject(new Error(`下载失败: ${response.statusCode}`))
return
}
const totalBytes = Number(response.headers['content-length'] || 0) || undefined
let downloadedBytes = 0
const writer = createWriteStream(targetPath)
response.on('data', (chunk) => {
downloadedBytes += chunk.length
const percent = totalBytes ? (downloadedBytes / totalBytes) * 100 : undefined
onProgress?.({ modelName, downloadedBytes, totalBytes, percent })
})
response.on('error', (error) => {
try { writer.close() } catch { }
reject(error)
})
writer.on('error', (error) => {
try { writer.close() } catch { }
reject(error)
})
writer.on('finish', () => {
writer.close()
resolve()
})
response.pipe(writer)
})
request.on('error', reject)
})
}
}
export const voiceTranscribeService = new VoiceTranscribeService()

22
electron/types/whisper-node.d.ts vendored Normal file
View File

@@ -0,0 +1,22 @@
declare module 'whisper-node' {
export type WhisperSegment = {
start: string
end: string
speech: string
}
export type WhisperOptions = {
modelName?: string
modelPath?: string
whisperOptions?: {
language?: string
gen_file_txt?: boolean
gen_file_subtitle?: boolean
gen_file_vtt?: boolean
word_timestamps?: boolean
timestamp_size?: number
}
}
export default function whisper(filePath: string, options?: WhisperOptions): Promise<WhisperSegment[]>
}