mirror of
https://github.com/hicccc77/WeFlow.git
synced 2026-03-22 07:32:29 +08:00
feat: 尝试增加一下聊天里面的语音转文字功能
This commit is contained in:
@@ -15,6 +15,7 @@ import { groupAnalyticsService } from './services/groupAnalyticsService'
|
||||
import { annualReportService } from './services/annualReportService'
|
||||
import { exportService, ExportOptions } from './services/exportService'
|
||||
import { KeyService } from './services/keyService'
|
||||
import { voiceTranscribeService } from './services/voiceTranscribeService'
|
||||
|
||||
|
||||
// 配置自动更新
|
||||
@@ -442,6 +443,10 @@ function registerIpcHandlers() {
|
||||
return chatService.getVoiceData(sessionId, msgId)
|
||||
})
|
||||
|
||||
ipcMain.handle('chat:getVoiceTranscript', async (_, sessionId: string, msgId: string) => {
|
||||
return chatService.getVoiceTranscript(sessionId, msgId)
|
||||
})
|
||||
|
||||
ipcMain.handle('chat:getMessageById', async (_, sessionId: string, localId: number) => {
|
||||
return chatService.getMessageById(sessionId, localId)
|
||||
})
|
||||
@@ -516,6 +521,16 @@ function registerIpcHandlers() {
|
||||
return { success: true }
|
||||
})
|
||||
|
||||
ipcMain.handle('whisper:downloadModel', async (event, payload: { modelName: string; downloadDir?: string; source?: string }) => {
|
||||
return voiceTranscribeService.downloadModel(payload, (progress) => {
|
||||
event.sender.send('whisper:downloadProgress', progress)
|
||||
})
|
||||
})
|
||||
|
||||
ipcMain.handle('whisper:getModelStatus', async (_, payload: { modelName: string; downloadDir?: string }) => {
|
||||
return voiceTranscribeService.getModelStatus(payload)
|
||||
})
|
||||
|
||||
// 群聊分析相关
|
||||
ipcMain.handle('groupAnalytics:getGroupChats', async () => {
|
||||
return groupAnalyticsService.getGroupChats()
|
||||
|
||||
@@ -106,7 +106,8 @@ contextBridge.exposeInMainWorld('electronAPI', {
|
||||
close: () => ipcRenderer.invoke('chat:close'),
|
||||
getSessionDetail: (sessionId: string) => ipcRenderer.invoke('chat:getSessionDetail', sessionId),
|
||||
getImageData: (sessionId: string, msgId: string) => ipcRenderer.invoke('chat:getImageData', sessionId, msgId),
|
||||
getVoiceData: (sessionId: string, msgId: string) => ipcRenderer.invoke('chat:getVoiceData', sessionId, msgId)
|
||||
getVoiceData: (sessionId: string, msgId: string) => ipcRenderer.invoke('chat:getVoiceData', sessionId, msgId),
|
||||
getVoiceTranscript: (sessionId: string, msgId: string) => ipcRenderer.invoke('chat:getVoiceTranscript', sessionId, msgId)
|
||||
},
|
||||
|
||||
|
||||
@@ -174,5 +175,16 @@ contextBridge.exposeInMainWorld('electronAPI', {
|
||||
ipcRenderer.invoke('export:exportSessions', sessionIds, outputDir, options),
|
||||
exportSession: (sessionId: string, outputPath: string, options: any) =>
|
||||
ipcRenderer.invoke('export:exportSession', sessionId, outputPath, options)
|
||||
},
|
||||
|
||||
whisper: {
|
||||
downloadModel: (payload: { modelName: string; downloadDir?: string; source?: string }) =>
|
||||
ipcRenderer.invoke('whisper:downloadModel', payload),
|
||||
getModelStatus: (payload: { modelName: string; downloadDir?: string }) =>
|
||||
ipcRenderer.invoke('whisper:getModelStatus', payload),
|
||||
onDownloadProgress: (callback: (payload: { modelName: string; downloadedBytes: number; totalBytes?: number; percent?: number }) => void) => {
|
||||
ipcRenderer.on('whisper:downloadProgress', (_, payload) => callback(payload))
|
||||
return () => ipcRenderer.removeAllListeners('whisper:downloadProgress')
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
@@ -16,6 +16,7 @@ import { ConfigService } from './config'
|
||||
import { wcdbService } from './wcdbService'
|
||||
import { MessageCacheService } from './messageCacheService'
|
||||
import { ContactCacheService, ContactCacheEntry } from './contactCacheService'
|
||||
import { voiceTranscribeService } from './voiceTranscribeService'
|
||||
|
||||
type HardlinkState = {
|
||||
db: Database.Database
|
||||
@@ -83,6 +84,10 @@ class ChatService {
|
||||
private hardlinkCache = new Map<string, HardlinkState>()
|
||||
private readonly contactCacheService: ContactCacheService
|
||||
private readonly messageCacheService: MessageCacheService
|
||||
private voiceWavCache = new Map<string, Buffer>()
|
||||
private voiceTranscriptCache = new Map<string, string>()
|
||||
private voiceTranscriptPending = new Map<string, Promise<{ success: boolean; transcript?: string; error?: string }>>()
|
||||
private readonly voiceCacheMaxEntries = 50
|
||||
|
||||
constructor() {
|
||||
this.configService = new ConfigService()
|
||||
@@ -1738,6 +1743,9 @@ class ChatService {
|
||||
|
||||
if (includeMessages) {
|
||||
this.messageCacheService.clear()
|
||||
this.voiceWavCache.clear()
|
||||
this.voiceTranscriptCache.clear()
|
||||
this.voiceTranscriptPending.clear()
|
||||
}
|
||||
|
||||
for (const state of this.hardlinkCache.values()) {
|
||||
@@ -2263,6 +2271,8 @@ class ChatService {
|
||||
const pcmData = readFileSync(pcmFile)
|
||||
const wavHeader = this.createWavHeader(pcmData.length, 24000, 1) // 微信语音通常 24kHz
|
||||
const wavData = Buffer.concat([wavHeader, pcmData])
|
||||
const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
|
||||
this.cacheVoiceWav(cacheKey, wavData)
|
||||
|
||||
return { success: true, data: wavData.toString('base64') }
|
||||
} finally {
|
||||
@@ -2276,6 +2286,45 @@ class ChatService {
|
||||
}
|
||||
}
|
||||
|
||||
async getVoiceTranscript(sessionId: string, msgId: string): Promise<{ success: boolean; transcript?: string; error?: string }> {
|
||||
const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
|
||||
const cached = this.voiceTranscriptCache.get(cacheKey)
|
||||
if (cached) {
|
||||
return { success: true, transcript: cached }
|
||||
}
|
||||
|
||||
const pending = this.voiceTranscriptPending.get(cacheKey)
|
||||
if (pending) {
|
||||
return pending
|
||||
}
|
||||
|
||||
const task = (async () => {
|
||||
try {
|
||||
let wavData = this.voiceWavCache.get(cacheKey)
|
||||
if (!wavData) {
|
||||
const voiceResult = await this.getVoiceData(sessionId, msgId)
|
||||
if (!voiceResult.success || !voiceResult.data) {
|
||||
return { success: false, error: voiceResult.error || '语音解码失败' }
|
||||
}
|
||||
wavData = Buffer.from(voiceResult.data, 'base64')
|
||||
}
|
||||
|
||||
const result = await voiceTranscribeService.transcribeWavBuffer(wavData)
|
||||
if (result.success && result.transcript) {
|
||||
this.cacheVoiceTranscript(cacheKey, result.transcript)
|
||||
}
|
||||
return result
|
||||
} catch (error) {
|
||||
return { success: false, error: String(error) }
|
||||
} finally {
|
||||
this.voiceTranscriptPending.delete(cacheKey)
|
||||
}
|
||||
})()
|
||||
|
||||
this.voiceTranscriptPending.set(cacheKey, task)
|
||||
return task
|
||||
}
|
||||
|
||||
private createWavHeader(pcmLength: number, sampleRate: number = 24000, channels: number = 1): Buffer {
|
||||
const header = Buffer.alloc(44)
|
||||
header.write('RIFF', 0)
|
||||
@@ -2294,6 +2343,26 @@ class ChatService {
|
||||
return header
|
||||
}
|
||||
|
||||
private getVoiceCacheKey(sessionId: string, msgId: string): string {
|
||||
return `${sessionId}:${msgId}`
|
||||
}
|
||||
|
||||
private cacheVoiceWav(cacheKey: string, wavData: Buffer): void {
|
||||
this.voiceWavCache.set(cacheKey, wavData)
|
||||
if (this.voiceWavCache.size > this.voiceCacheMaxEntries) {
|
||||
const oldestKey = this.voiceWavCache.keys().next().value
|
||||
if (oldestKey) this.voiceWavCache.delete(oldestKey)
|
||||
}
|
||||
}
|
||||
|
||||
private cacheVoiceTranscript(cacheKey: string, transcript: string): void {
|
||||
this.voiceTranscriptCache.set(cacheKey, transcript)
|
||||
if (this.voiceTranscriptCache.size > this.voiceCacheMaxEntries) {
|
||||
const oldestKey = this.voiceTranscriptCache.keys().next().value
|
||||
if (oldestKey) this.voiceTranscriptCache.delete(oldestKey)
|
||||
}
|
||||
}
|
||||
|
||||
async getMessageById(sessionId: string, localId: number): Promise<{ success: boolean; message?: Message; error?: string }> {
|
||||
try {
|
||||
console.info('[ChatService] getMessageById (SQL)', { sessionId, localId })
|
||||
|
||||
@@ -20,6 +20,9 @@ interface ConfigSchema {
|
||||
language: string
|
||||
logEnabled: boolean
|
||||
llmModelPath: string
|
||||
whisperModelName: string
|
||||
whisperModelDir: string
|
||||
whisperDownloadSource: string
|
||||
}
|
||||
|
||||
export class ConfigService {
|
||||
@@ -42,7 +45,10 @@ export class ConfigService {
|
||||
themeId: 'cloud-dancer',
|
||||
language: 'zh-CN',
|
||||
logEnabled: false,
|
||||
llmModelPath: ''
|
||||
llmModelPath: '',
|
||||
whisperModelName: 'base',
|
||||
whisperModelDir: '',
|
||||
whisperDownloadSource: 'tsinghua'
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
281
electron/services/voiceTranscribeService.ts
Normal file
281
electron/services/voiceTranscribeService.ts
Normal file
@@ -0,0 +1,281 @@
|
||||
import { app } from 'electron'
|
||||
import { createWriteStream, existsSync, mkdirSync, statSync, unlinkSync, writeFileSync } from 'fs'
|
||||
import { join, dirname } from 'path'
|
||||
import { promisify } from 'util'
|
||||
import { execFile, spawnSync } from 'child_process'
|
||||
import * as https from 'https'
|
||||
import * as http from 'http'
|
||||
import { ConfigService } from './config'
|
||||
|
||||
const execFileAsync = promisify(execFile)
|
||||
|
||||
type WhisperModelInfo = {
|
||||
name: string
|
||||
fileName: string
|
||||
sizeLabel: string
|
||||
sizeBytes?: number
|
||||
}
|
||||
|
||||
type DownloadProgress = {
|
||||
modelName: string
|
||||
downloadedBytes: number
|
||||
totalBytes?: number
|
||||
percent?: number
|
||||
}
|
||||
|
||||
const WHISPER_MODELS: Record<string, WhisperModelInfo> = {
|
||||
tiny: { name: 'tiny', fileName: 'ggml-tiny.bin', sizeLabel: '75 MB', sizeBytes: 75_000_000 },
|
||||
base: { name: 'base', fileName: 'ggml-base.bin', sizeLabel: '142 MB', sizeBytes: 142_000_000 },
|
||||
small: { name: 'small', fileName: 'ggml-small.bin', sizeLabel: '466 MB', sizeBytes: 466_000_000 },
|
||||
medium: { name: 'medium', fileName: 'ggml-medium.bin', sizeLabel: '1.5 GB', sizeBytes: 1_500_000_000 },
|
||||
'large-v3': { name: 'large-v3', fileName: 'ggml-large-v3.bin', sizeLabel: '2.9 GB', sizeBytes: 2_900_000_000 }
|
||||
}
|
||||
|
||||
const WHISPER_SOURCES: Record<string, string> = {
|
||||
official: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main',
|
||||
tsinghua: 'https://hf-mirror.com/ggerganov/whisper.cpp/resolve/main'
|
||||
}
|
||||
|
||||
function getStaticFfmpegPath(): string | null {
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
const ffmpegStatic = require('ffmpeg-static')
|
||||
if (typeof ffmpegStatic === 'string' && existsSync(ffmpegStatic)) {
|
||||
return ffmpegStatic
|
||||
}
|
||||
const devPath = join(process.cwd(), 'node_modules', 'ffmpeg-static', 'ffmpeg.exe')
|
||||
if (existsSync(devPath)) {
|
||||
return devPath
|
||||
}
|
||||
if (app.isPackaged) {
|
||||
const resourcesPath = process.resourcesPath
|
||||
const packedPath = join(resourcesPath, 'app.asar.unpacked', 'node_modules', 'ffmpeg-static', 'ffmpeg.exe')
|
||||
if (existsSync(packedPath)) {
|
||||
return packedPath
|
||||
}
|
||||
}
|
||||
return null
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
export class VoiceTranscribeService {
|
||||
private configService = new ConfigService()
|
||||
private downloadTasks = new Map<string, Promise<{ success: boolean; path?: string; error?: string }>>()
|
||||
|
||||
private resolveModelInfo(modelName: string): WhisperModelInfo | null {
|
||||
return WHISPER_MODELS[modelName] || null
|
||||
}
|
||||
|
||||
private resolveModelDir(overrideDir?: string): string {
|
||||
const configured = overrideDir || this.configService.get('whisperModelDir')
|
||||
if (configured) return configured
|
||||
return join(app.getPath('userData'), 'models', 'whisper')
|
||||
}
|
||||
|
||||
private resolveModelPath(modelName: string, overrideDir?: string): string | null {
|
||||
const info = this.resolveModelInfo(modelName)
|
||||
if (!info) return null
|
||||
return join(this.resolveModelDir(overrideDir), info.fileName)
|
||||
}
|
||||
|
||||
private resolveSourceUrl(overrideSource?: string): string {
|
||||
const configured = overrideSource || this.configService.get('whisperDownloadSource')
|
||||
if (configured && WHISPER_SOURCES[configured]) return WHISPER_SOURCES[configured]
|
||||
return WHISPER_SOURCES.official
|
||||
}
|
||||
|
||||
async getModelStatus(payload: { modelName: string; downloadDir?: string }): Promise<{
|
||||
success: boolean
|
||||
exists?: boolean
|
||||
path?: string
|
||||
sizeBytes?: number
|
||||
error?: string
|
||||
}> {
|
||||
const modelPath = this.resolveModelPath(payload.modelName, payload.downloadDir)
|
||||
if (!modelPath) {
|
||||
return { success: false, error: '未知模型名称' }
|
||||
}
|
||||
if (!existsSync(modelPath)) {
|
||||
return { success: true, exists: false, path: modelPath }
|
||||
}
|
||||
const sizeBytes = statSync(modelPath).size
|
||||
return { success: true, exists: true, path: modelPath, sizeBytes }
|
||||
}
|
||||
|
||||
async downloadModel(
|
||||
payload: { modelName: string; downloadDir?: string; source?: string },
|
||||
onProgress?: (progress: DownloadProgress) => void
|
||||
): Promise<{ success: boolean; path?: string; error?: string }> {
|
||||
const info = this.resolveModelInfo(payload.modelName)
|
||||
if (!info) {
|
||||
return { success: false, error: '未知模型名称' }
|
||||
}
|
||||
|
||||
const modelPath = this.resolveModelPath(payload.modelName, payload.downloadDir)
|
||||
if (!modelPath) {
|
||||
return { success: false, error: '模型路径生成失败' }
|
||||
}
|
||||
|
||||
if (existsSync(modelPath)) {
|
||||
return { success: true, path: modelPath }
|
||||
}
|
||||
|
||||
const cacheKey = `${payload.modelName}:${modelPath}`
|
||||
const pending = this.downloadTasks.get(cacheKey)
|
||||
if (pending) return pending
|
||||
|
||||
const task = (async () => {
|
||||
try {
|
||||
const targetDir = this.resolveModelDir(payload.downloadDir)
|
||||
if (!existsSync(targetDir)) {
|
||||
mkdirSync(targetDir, { recursive: true })
|
||||
}
|
||||
|
||||
const baseUrl = this.resolveSourceUrl(payload.source)
|
||||
const url = `${baseUrl}/${info.fileName}`
|
||||
await this.downloadToFile(url, modelPath, payload.modelName, onProgress)
|
||||
return { success: true, path: modelPath }
|
||||
} catch (error) {
|
||||
try { if (existsSync(modelPath)) unlinkSync(modelPath) } catch { }
|
||||
return { success: false, error: String(error) }
|
||||
} finally {
|
||||
this.downloadTasks.delete(cacheKey)
|
||||
}
|
||||
})()
|
||||
|
||||
this.downloadTasks.set(cacheKey, task)
|
||||
return task
|
||||
}
|
||||
|
||||
async transcribeWavBuffer(wavData: Buffer): Promise<{ success: boolean; transcript?: string; error?: string }> {
|
||||
const modelName = this.configService.get('whisperModelName') || 'base'
|
||||
const modelPath = this.resolveModelPath(modelName)
|
||||
console.info('[VoiceTranscribe] check model', { modelName, modelPath, exists: modelPath ? existsSync(modelPath) : false })
|
||||
if (!modelPath || !existsSync(modelPath)) {
|
||||
return { success: false, error: '未下载语音模型,请在设置中下载' }
|
||||
}
|
||||
|
||||
// 使用内置的预编译 whisper-cli.exe
|
||||
const resourcesPath = app.isPackaged
|
||||
? join(process.resourcesPath, 'resources')
|
||||
: join(app.getAppPath(), 'resources')
|
||||
const whisperExe = join(resourcesPath, 'whisper-cli.exe')
|
||||
|
||||
if (!existsSync(whisperExe)) {
|
||||
return { success: false, error: '找不到语音转写程序,请重新安装应用' }
|
||||
}
|
||||
|
||||
const ffmpegPath = getStaticFfmpegPath() || 'ffmpeg'
|
||||
console.info('[VoiceTranscribe] ffmpeg path', ffmpegPath)
|
||||
|
||||
const tempDir = app.getPath('temp')
|
||||
const fileToken = `${Date.now()}_${Math.random().toString(16).slice(2)}`
|
||||
const inputPath = join(tempDir, `weflow_voice_${fileToken}.wav`)
|
||||
const outputPath = join(tempDir, `weflow_voice_${fileToken}_16k.wav`)
|
||||
|
||||
try {
|
||||
writeFileSync(inputPath, wavData)
|
||||
console.info('[VoiceTranscribe] converting to 16kHz', { inputPath, outputPath })
|
||||
await execFileAsync(ffmpegPath, ['-y', '-i', inputPath, '-ar', '16000', '-ac', '1', outputPath])
|
||||
|
||||
console.info('[VoiceTranscribe] transcribing with whisper', { whisperExe, modelPath })
|
||||
const { stdout } = await execFileAsync(whisperExe, [
|
||||
'-m', modelPath,
|
||||
'-f', outputPath,
|
||||
'-l', 'zh',
|
||||
'-otxt'
|
||||
], {
|
||||
maxBuffer: 10 * 1024 * 1024,
|
||||
cwd: tempDir
|
||||
})
|
||||
|
||||
// 解析输出文本
|
||||
const txtFile = outputPath.replace(/\.[^.]+$/, '.txt')
|
||||
let transcript = ''
|
||||
if (existsSync(txtFile)) {
|
||||
const { readFileSync } = await import('fs')
|
||||
transcript = readFileSync(txtFile, 'utf-8').trim()
|
||||
unlinkSync(txtFile)
|
||||
} else {
|
||||
// 从 stdout 提取
|
||||
const lines = stdout.split('\n').filter(line => {
|
||||
const trimmed = line.trim()
|
||||
return trimmed && !trimmed.startsWith('[') && !trimmed.startsWith('whisper_')
|
||||
})
|
||||
transcript = lines.join(' ').trim()
|
||||
}
|
||||
|
||||
console.info('[VoiceTranscribe] success', { transcript })
|
||||
return { success: true, transcript }
|
||||
} catch (error) {
|
||||
console.error('[VoiceTranscribe] failed', error)
|
||||
return { success: false, error: String(error) }
|
||||
} finally {
|
||||
try { if (existsSync(inputPath)) unlinkSync(inputPath) } catch { }
|
||||
try { if (existsSync(outputPath)) unlinkSync(outputPath) } catch { }
|
||||
}
|
||||
}
|
||||
|
||||
private downloadToFile(
|
||||
url: string,
|
||||
targetPath: string,
|
||||
modelName: string,
|
||||
onProgress?: (progress: DownloadProgress) => void,
|
||||
remainingRedirects = 3
|
||||
): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const protocol = url.startsWith('https') ? https : http
|
||||
const request = protocol.get(url, (response) => {
|
||||
if ([301, 302, 303, 307, 308].includes(response.statusCode || 0) && response.headers.location) {
|
||||
if (remainingRedirects <= 0) {
|
||||
reject(new Error('下载重定向次数过多'))
|
||||
return
|
||||
}
|
||||
this.downloadToFile(response.headers.location, targetPath, modelName, onProgress, remainingRedirects - 1)
|
||||
.then(resolve)
|
||||
.catch(reject)
|
||||
return
|
||||
}
|
||||
|
||||
if (response.statusCode !== 200) {
|
||||
reject(new Error(`下载失败: ${response.statusCode}`))
|
||||
return
|
||||
}
|
||||
|
||||
const totalBytes = Number(response.headers['content-length'] || 0) || undefined
|
||||
let downloadedBytes = 0
|
||||
|
||||
const writer = createWriteStream(targetPath)
|
||||
|
||||
response.on('data', (chunk) => {
|
||||
downloadedBytes += chunk.length
|
||||
const percent = totalBytes ? (downloadedBytes / totalBytes) * 100 : undefined
|
||||
onProgress?.({ modelName, downloadedBytes, totalBytes, percent })
|
||||
})
|
||||
|
||||
response.on('error', (error) => {
|
||||
try { writer.close() } catch { }
|
||||
reject(error)
|
||||
})
|
||||
|
||||
writer.on('error', (error) => {
|
||||
try { writer.close() } catch { }
|
||||
reject(error)
|
||||
})
|
||||
|
||||
writer.on('finish', () => {
|
||||
writer.close()
|
||||
resolve()
|
||||
})
|
||||
|
||||
response.pipe(writer)
|
||||
})
|
||||
|
||||
request.on('error', reject)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
export const voiceTranscribeService = new VoiceTranscribeService()
|
||||
22
electron/types/whisper-node.d.ts
vendored
Normal file
22
electron/types/whisper-node.d.ts
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
declare module 'whisper-node' {
|
||||
export type WhisperSegment = {
|
||||
start: string
|
||||
end: string
|
||||
speech: string
|
||||
}
|
||||
|
||||
export type WhisperOptions = {
|
||||
modelName?: string
|
||||
modelPath?: string
|
||||
whisperOptions?: {
|
||||
language?: string
|
||||
gen_file_txt?: boolean
|
||||
gen_file_subtitle?: boolean
|
||||
gen_file_vtt?: boolean
|
||||
word_timestamps?: boolean
|
||||
timestamp_size?: number
|
||||
}
|
||||
}
|
||||
|
||||
export default function whisper(filePath: string, options?: WhisperOptions): Promise<WhisperSegment[]>
|
||||
}
|
||||
Reference in New Issue
Block a user