mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-05-13 15:51:27 +08:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| cec674cb47 | |||
| c5a90823fa | |||
| 18d82bc1f0 | |||
| a68af990ea | |||
| e71c600d10 | |||
| d7f1f7182c | |||
| dfb2e460b4 | |||
| 5badef8ba9 | |||
| 18aa5ce75c | |||
| 1545a9f262 |
@@ -90,6 +90,13 @@ pip3 install -r requirements.txt
|
||||
|
||||
参考[#415](https://github.com/zhayujie/chatgpt-on-wechat/issues/415)
|
||||
|
||||
使用`azure`语音功能需安装依赖:
|
||||
```bash
|
||||
pip3 install azure-cognitiveservices-speech
|
||||
```
|
||||
> 目前默认发布的镜像和`railway`部署,都基于`apline`,无法安装`azure`的依赖。若有需求请自行基于[`debian`](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/docker/Dockerfile.debian.latest)打包。
|
||||
参考[文档](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/quickstarts/setup-platform?pivots=programming-language-python&tabs=linux%2Cubuntu%2Cdotnet%2Cjre%2Cmaven%2Cnodejs%2Cmac%2Cpypi)
|
||||
|
||||
## 配置
|
||||
|
||||
配置文件的模板在根目录的`config-template.json`中,需复制该模板创建最终生效的 `config.json` 文件:
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# encoding:utf-8
|
||||
|
||||
import os
|
||||
from config import conf, load_config
|
||||
from channel import channel_factory
|
||||
from common.log import logger
|
||||
@@ -13,6 +14,10 @@ def run():
|
||||
|
||||
# create channel
|
||||
channel_name=conf().get('channel_type', 'wx')
|
||||
if channel_name == 'wxy':
|
||||
os.environ['WECHATY_LOG']="warn"
|
||||
# os.environ['WECHATY_PUPPET_SERVICE_ENDPOINT'] = '127.0.0.1:9001'
|
||||
|
||||
channel = channel_factory.create_channel(channel_name)
|
||||
if channel_name in ['wx','wxy']:
|
||||
PluginManager().load_plugins()
|
||||
|
||||
@@ -98,11 +98,12 @@ class ChatChannel(Channel):
|
||||
else:
|
||||
context.type = ContextType.TEXT
|
||||
context.content = content
|
||||
if 'desire_rtype' not in context and conf().get('always_reply_voice'):
|
||||
context['desire_rtype'] = ReplyType.VOICE
|
||||
elif context.type == ContextType.VOICE:
|
||||
if 'desire_rtype' not in context and conf().get('voice_reply_voice'):
|
||||
context['desire_rtype'] = ReplyType.VOICE
|
||||
|
||||
|
||||
return context
|
||||
|
||||
# 处理消息 TODO: 如果wechaty解耦,此处逻辑可以放置到父类
|
||||
@@ -194,14 +195,17 @@ class ChatChannel(Channel):
|
||||
'channel': self, 'context': context, 'reply': reply}))
|
||||
reply = e_context['reply']
|
||||
if not e_context.is_pass() and reply and reply.type:
|
||||
logger.debug('[WX] ready to send reply: {} to {}'.format(reply, context))
|
||||
logger.debug('[WX] ready to send reply: {}, context: {}'.format(reply, context))
|
||||
self._send(reply, context)
|
||||
|
||||
def _send(self, reply: Reply, context: Context, retry_cnt = 0):
|
||||
try:
|
||||
self.send(reply, context)
|
||||
except Exception as e:
|
||||
logger.error('[WX] sendMsg error: {}'.format(e))
|
||||
logger.error('[WX] sendMsg error: {}'.format(str(e)))
|
||||
if isinstance(e, NotImplementedError):
|
||||
return
|
||||
logger.exception(e)
|
||||
if retry_cnt < 2:
|
||||
time.sleep(3+3*retry_cnt)
|
||||
self._send(reply, context, retry_cnt+1)
|
||||
|
||||
@@ -64,6 +64,23 @@ def _check(func):
|
||||
return func(self, cmsg)
|
||||
return wrapper
|
||||
|
||||
#可用的二维码生成接口
|
||||
#https://api.qrserver.com/v1/create-qr-code/?size=400×400&data=https://www.abc.com
|
||||
#https://api.isoyu.com/qr/?m=1&e=L&p=20&url=https://www.abc.com
|
||||
def qrCallback(uuid,status,qrcode):
|
||||
# logger.debug("qrCallback: {} {}".format(uuid,status))
|
||||
if status == '0':
|
||||
import qrcode
|
||||
url = f"https://login.weixin.qq.com/l/{uuid}"
|
||||
|
||||
qr_api="https://api.isoyu.com/qr/?m=1&e=L&p=20&url={}".format(url)
|
||||
print("You can also scan QRCode in the website below:\n{}".format(qr_api))
|
||||
|
||||
qr = qrcode.QRCode(border=1)
|
||||
qr.add_data(url)
|
||||
qr.make(fit=True)
|
||||
qr.print_ascii(invert=True)
|
||||
|
||||
@singleton
|
||||
class WechatChannel(ChatChannel):
|
||||
def __init__(self):
|
||||
@@ -76,13 +93,13 @@ class WechatChannel(ChatChannel):
|
||||
# login by scan QRCode
|
||||
hotReload = conf().get('hot_reload', False)
|
||||
try:
|
||||
itchat.auto_login(enableCmdQR=2, hotReload=hotReload)
|
||||
itchat.auto_login(enableCmdQR=2, hotReload=hotReload, qrCallback=qrCallback)
|
||||
except Exception as e:
|
||||
if hotReload:
|
||||
logger.error("Hot reload failed, try to login without hot reload")
|
||||
itchat.logout()
|
||||
os.remove("itchat.pkl")
|
||||
itchat.auto_login(enableCmdQR=2, hotReload=hotReload)
|
||||
itchat.auto_login(enableCmdQR=2, hotReload=hotReload, qrCallback=qrCallback)
|
||||
else:
|
||||
raise e
|
||||
self.user_id = itchat.instance.storageClass.userName
|
||||
|
||||
@@ -20,7 +20,7 @@ from channel.wechat.wechaty_message import WechatyMessage
|
||||
from common.log import logger
|
||||
from config import conf
|
||||
try:
|
||||
from voice.audio_convert import mp3_to_sil
|
||||
from voice.audio_convert import any_to_sil
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
@@ -35,14 +35,12 @@ class WechatyChannel(ChatChannel):
|
||||
pass
|
||||
|
||||
def startup(self):
|
||||
asyncio.run(self.main())
|
||||
|
||||
async def main(self):
|
||||
config = conf()
|
||||
token = config.get('wechaty_puppet_service_token')
|
||||
os.environ['WECHATY_PUPPET_SERVICE_TOKEN'] = token
|
||||
os.environ['WECHATY_LOG']="warn"
|
||||
# os.environ['WECHATY_PUPPET_SERVICE_ENDPOINT'] = '127.0.0.1:9001'
|
||||
asyncio.run(self.main())
|
||||
|
||||
async def main(self):
|
||||
self.bot = Wechaty()
|
||||
self.bot.on('login', self.on_login)
|
||||
self.bot.on('message', self.on_message)
|
||||
@@ -72,18 +70,9 @@ class WechatyChannel(ChatChannel):
|
||||
logger.info('[WX] sendMsg={}, receiver={}'.format(reply, receiver))
|
||||
elif reply.type == ReplyType.VOICE:
|
||||
voiceLength = None
|
||||
if reply.content.endswith('.mp3'):
|
||||
mp3_file = reply.content
|
||||
sil_file = os.path.splitext(mp3_file)[0] + '.sil'
|
||||
voiceLength = mp3_to_sil(mp3_file, sil_file)
|
||||
try:
|
||||
os.remove(mp3_file)
|
||||
except Exception as e:
|
||||
pass
|
||||
elif reply.content.endswith('.sil'):
|
||||
sil_file = reply.content
|
||||
else:
|
||||
raise Exception('voice file must be mp3 or sil format')
|
||||
file_path = reply.content
|
||||
sil_file = os.path.splitext(file_path)[0] + '.sil'
|
||||
voiceLength = int(any_to_sil(file_path, sil_file))
|
||||
# 发送语音
|
||||
t = int(time.time())
|
||||
msg = FileBox.from_file(sil_file, name=str(t) + '.sil')
|
||||
@@ -91,6 +80,7 @@ class WechatyChannel(ChatChannel):
|
||||
msg.metadata['voiceLength'] = voiceLength
|
||||
asyncio.run_coroutine_threadsafe(receiver.say(msg),loop).result()
|
||||
try:
|
||||
os.remove(file_path)
|
||||
os.remove(sil_file)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
@@ -47,16 +47,21 @@ available_setting = {
|
||||
"speech_recognition": False, # 是否开启语音识别
|
||||
"group_speech_recognition": False, # 是否开启群组语音识别
|
||||
"voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key
|
||||
"voice_to_text": "openai", # 语音识别引擎,支持openai,google
|
||||
"text_to_voice": "baidu", # 语音合成引擎,支持baidu,google,pytts(offline)
|
||||
"always_reply_voice": False, # 是否一直使用语音回复
|
||||
"voice_to_text": "openai", # 语音识别引擎,支持openai,google,azure
|
||||
"text_to_voice": "baidu", # 语音合成引擎,支持baidu,google,pytts(offline),azure
|
||||
|
||||
# baidu api的配置, 使用百度语音识别和语音合成时需要
|
||||
# baidu 语音api配置, 使用百度语音识别和语音合成时需要
|
||||
"baidu_app_id": "",
|
||||
"baidu_api_key": "",
|
||||
"baidu_secret_key": "",
|
||||
# 1536普通话(支持简单的英文识别) 1737英语 1637粤语 1837四川话 1936普通话远场
|
||||
"baidu_dev_pid": "1536",
|
||||
|
||||
# azure 语音api配置, 使用azure语音识别和语音合成时需要
|
||||
"azure_voice_api_key": "",
|
||||
"azure_voice_region": "japaneast",
|
||||
|
||||
# 服务时间限制,目前支持itchat
|
||||
"chat_time_module": False, # 是否开启服务时间限制
|
||||
"chat_start_time": "00:00", # 服务开始时间
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
FROM python:3.10-slim
|
||||
|
||||
LABEL maintainer="foo@bar.com"
|
||||
ARG TZ='Asia/Shanghai'
|
||||
|
||||
ARG CHATGPT_ON_WECHAT_VER
|
||||
|
||||
ENV BUILD_PREFIX=/app
|
||||
|
||||
ADD . ${BUILD_PREFIX}
|
||||
|
||||
RUN apt-get update \
|
||||
&&apt-get install -y --no-install-recommends bash \
|
||||
ffmpeg espeak \
|
||||
&& cd ${BUILD_PREFIX} \
|
||||
&& cp config-template.json config.json \
|
||||
&& /usr/local/bin/python -m pip install --no-cache --upgrade pip \
|
||||
&& pip install --no-cache -r requirements.txt \
|
||||
&& pip install azure-cognitiveservices-speech
|
||||
|
||||
WORKDIR ${BUILD_PREFIX}
|
||||
|
||||
ADD docker/entrypoint.sh /entrypoint.sh
|
||||
|
||||
RUN chmod +x /entrypoint.sh \
|
||||
&& groupadd -r noroot \
|
||||
&& useradd -r -g noroot -s /bin/bash -d /home/noroot noroot \
|
||||
&& chown -R noroot:noroot ${BUILD_PREFIX}
|
||||
|
||||
USER noroot
|
||||
|
||||
ENTRYPOINT ["docker/entrypoint.sh"]
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
cd .. && docker build -f Dockerfile \
|
||||
cd .. && docker build -f docker/Dockerfile.latest \
|
||||
-t zhayujie/chatgpt-on-wechat .
|
||||
+2
-2
@@ -1,6 +1,6 @@
|
||||
## 插件化初衷
|
||||
|
||||
之前未插件化的代码耦合程度高,如果要定制一些个性化功能(如流量控制、接入`NovelAI`画图平台等),需要了解代码主体,避免影响到其他的功能。在实现多个功能后,不但无法调整功能的优先级顺序,功能的配置项也会变得非常混乱。
|
||||
之前未插件化的代码耦合程度高,如果要定制一些个性化功能(如流量控制、接入`NovelAI`画图平台等),需要了解代码主体,避免影响到其他的功能。多个功能同时存在时,无法调整功能的优先级顺序,功能配置项也非常混乱。
|
||||
|
||||
此时插件化应声而出。
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
- [x] 插件化能够自由开关和调整优先级。
|
||||
- [x] 每个插件可在插件文件夹内维护独立的配置文件,方便代码的测试和调试,可以在独立的仓库开发插件。
|
||||
|
||||
PS: 插件目前仅支持`itchat`
|
||||
PS: 插件目前支持`itchat`和`wechaty`
|
||||
|
||||
## 插件化实现
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ PyQRCode>=1.2.1
|
||||
pysilk>=0.0.1
|
||||
pysilk_mod>=1.6.0
|
||||
pyttsx3>=2.90
|
||||
qrcode>=7.4.2
|
||||
requests>=2.28.2
|
||||
webuiapi>=0.6.2
|
||||
wechaty>=0.10.7
|
||||
|
||||
+45
-16
@@ -1,7 +1,23 @@
|
||||
import shutil
|
||||
import wave
|
||||
import pysilk
|
||||
from pydub import AudioSegment
|
||||
|
||||
sil_supports=[8000, 12000, 16000, 24000, 32000, 44100, 48000] # slk转wav时,支持的采样率
|
||||
def find_closest_sil_supports(sample_rate):
|
||||
"""
|
||||
找到最接近的支持的采样率
|
||||
"""
|
||||
if sample_rate in sil_supports:
|
||||
return sample_rate
|
||||
closest = 0
|
||||
mindiff = 9999999
|
||||
for rate in sil_supports:
|
||||
diff = abs(rate - sample_rate)
|
||||
if diff < mindiff:
|
||||
closest = rate
|
||||
mindiff = diff
|
||||
return closest
|
||||
|
||||
def get_pcm_from_wav(wav_path):
|
||||
"""
|
||||
@@ -13,6 +29,30 @@ def get_pcm_from_wav(wav_path):
|
||||
wav = wave.open(wav_path, "rb")
|
||||
return wav.readframes(wav.getnframes())
|
||||
|
||||
def any_to_wav(any_path, wav_path):
|
||||
"""
|
||||
把任意格式转成wav文件
|
||||
"""
|
||||
if any_path.endswith('.wav'):
|
||||
shutil.copy2(any_path, wav_path)
|
||||
return
|
||||
if any_path.endswith('.sil') or any_path.endswith('.silk') or any_path.endswith('.slk'):
|
||||
return sil_to_wav(any_path, wav_path)
|
||||
audio = AudioSegment.from_file(any_path)
|
||||
audio.export(wav_path, format="wav")
|
||||
|
||||
def any_to_sil(any_path, sil_path):
|
||||
"""
|
||||
把任意格式转成sil文件
|
||||
"""
|
||||
if any_path.endswith('.sil') or any_path.endswith('.silk') or any_path.endswith('.slk'):
|
||||
shutil.copy2(any_path, sil_path)
|
||||
return 10000
|
||||
if any_path.endswith('.wav'):
|
||||
return pcm_to_sil(any_path, sil_path)
|
||||
if any_path.endswith('.mp3'):
|
||||
return mp3_to_sil(any_path, sil_path)
|
||||
raise NotImplementedError("Not support file type: {}".format(any_path))
|
||||
|
||||
def mp3_to_wav(mp3_path, wav_path):
|
||||
"""
|
||||
@@ -21,26 +61,16 @@ def mp3_to_wav(mp3_path, wav_path):
|
||||
audio = AudioSegment.from_mp3(mp3_path)
|
||||
audio.export(wav_path, format="wav")
|
||||
|
||||
def any_to_wav(any_path, wav_path):
|
||||
"""
|
||||
把任意格式转成wav文件
|
||||
"""
|
||||
if any_path.endswith('.wav'):
|
||||
return
|
||||
if any_path.endswith('.sil') or any_path.endswith('.silk') or any_path.endswith('.slk'):
|
||||
return sil_to_wav(any_path, wav_path)
|
||||
audio = AudioSegment.from_file(any_path)
|
||||
audio.export(wav_path, format="wav")
|
||||
|
||||
def pcm_to_silk(pcm_path, silk_path):
|
||||
def pcm_to_sil(pcm_path, silk_path):
|
||||
"""
|
||||
wav 文件转成 silk
|
||||
return 声音长度,毫秒
|
||||
"""
|
||||
audio = AudioSegment.from_wav(pcm_path)
|
||||
wav_data = audio.raw_data
|
||||
rate = find_closest_sil_supports(audio.frame_rate)
|
||||
silk_data = pysilk.encode(
|
||||
wav_data, data_rate=audio.frame_rate, sample_rate=audio.frame_rate)
|
||||
wav_data, data_rate=rate, sample_rate=rate)
|
||||
with open(silk_path, "wb") as f:
|
||||
f.write(silk_data)
|
||||
return audio.duration_seconds * 1000
|
||||
@@ -53,14 +83,13 @@ def mp3_to_sil(mp3_path, silk_path):
|
||||
"""
|
||||
audio = AudioSegment.from_mp3(mp3_path)
|
||||
wav_data = audio.raw_data
|
||||
silk_data = pysilk.encode(
|
||||
wav_data, data_rate=audio.frame_rate, sample_rate=audio.frame_rate)
|
||||
rate = find_closest_sil_supports(audio.frame_rate)
|
||||
silk_data = pysilk.encode(wav_data, data_rate=rate, sample_rate=rate)
|
||||
# Save the silk file
|
||||
with open(silk_path, "wb") as f:
|
||||
f.write(silk_data)
|
||||
return audio.duration_seconds * 1000
|
||||
|
||||
|
||||
def sil_to_wav(silk_path, wav_path, rate: int = 24000):
|
||||
"""
|
||||
silk 文件转 wav
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
|
||||
"""
|
||||
azure voice service
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import azure.cognitiveservices.speech as speechsdk
|
||||
from bridge.reply import Reply, ReplyType
|
||||
from common.log import logger
|
||||
from common.tmp_dir import TmpDir
|
||||
from voice.voice import Voice
|
||||
from config import conf
|
||||
"""
|
||||
Azure voice
|
||||
主目录设置文件中需填写azure_voice_api_key和azure_voice_region
|
||||
|
||||
查看可用的 voice: https://speech.microsoft.com/portal/voicegallery
|
||||
|
||||
"""
|
||||
|
||||
class AzureVoice(Voice):
|
||||
|
||||
def __init__(self):
|
||||
try:
|
||||
curdir = os.path.dirname(__file__)
|
||||
config_path = os.path.join(curdir, "config.json")
|
||||
config = None
|
||||
if not os.path.exists(config_path): #如果没有配置文件,创建本地配置文件
|
||||
config = { "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", "speech_recognition_language": "zh-CN"}
|
||||
with open(config_path, "w") as fw:
|
||||
json.dump(config, fw, indent=4)
|
||||
else:
|
||||
with open(config_path, "r") as fr:
|
||||
config = json.load(fr)
|
||||
self.api_key = conf().get('azure_voice_api_key')
|
||||
self.api_region = conf().get('azure_voice_region')
|
||||
self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
|
||||
self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"]
|
||||
self.speech_config.speech_recognition_language = config["speech_recognition_language"]
|
||||
except Exception as e:
|
||||
logger.warn("AzureVoice init failed: %s, ignore " % e)
|
||||
|
||||
def voiceToText(self, voice_file):
|
||||
audio_config = speechsdk.AudioConfig(filename=voice_file)
|
||||
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=audio_config)
|
||||
result = speech_recognizer.recognize_once()
|
||||
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
|
||||
logger.info('[Azure] voiceToText voice file name={} text={}'.format(voice_file, result.text))
|
||||
reply = Reply(ReplyType.TEXT, result.text)
|
||||
else:
|
||||
logger.error('[Azure] voiceToText error, result={}'.format(result))
|
||||
reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败")
|
||||
return reply
|
||||
|
||||
def textToVoice(self, text):
|
||||
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.wav'
|
||||
audio_config = speechsdk.AudioConfig(filename=fileName)
|
||||
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)
|
||||
result = speech_synthesizer.speak_text(text)
|
||||
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
|
||||
logger.info(
|
||||
'[Azure] textToVoice text={} voice file name={}'.format(text, fileName))
|
||||
reply = Reply(ReplyType.VOICE, fileName)
|
||||
else:
|
||||
logger.error('[Azure] textToVoice error, result={}'.format(result))
|
||||
reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
|
||||
return reply
|
||||
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
|
||||
"speech_recognition_language": "zh-CN"
|
||||
}
|
||||
@@ -25,12 +25,12 @@ class PyttsVoice(Voice):
|
||||
|
||||
def textToVoice(self, text):
|
||||
try:
|
||||
mp3File = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
|
||||
self.engine.save_to_file(text, mp3File)
|
||||
wavFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.wav'
|
||||
self.engine.save_to_file(text, wavFile)
|
||||
self.engine.runAndWait()
|
||||
logger.info(
|
||||
'[Pytts] textToVoice text={} voice file name={}'.format(text, mp3File))
|
||||
reply = Reply(ReplyType.VOICE, mp3File)
|
||||
'[Pytts] textToVoice text={} voice file name={}'.format(text, wavFile))
|
||||
reply = Reply(ReplyType.VOICE, wavFile)
|
||||
except Exception as e:
|
||||
reply = Reply(ReplyType.ERROR, str(e))
|
||||
finally:
|
||||
|
||||
@@ -20,4 +20,7 @@ def create_voice(voice_type):
|
||||
elif voice_type == 'pytts':
|
||||
from voice.pytts.pytts_voice import PyttsVoice
|
||||
return PyttsVoice()
|
||||
elif voice_type == 'azure':
|
||||
from voice.azure.azure_voice import AzureVoice
|
||||
return AzureVoice()
|
||||
raise RuntimeError
|
||||
|
||||
Reference in New Issue
Block a user