add Xunfei Voice

Signed-off-by: njnuko <njnuko@163.com>
2026-03-18 04:25:14 +08:00 · 2024-05-20 15:04:23 +08:00
parent 99aac76618
commit 6fed719e09
5 changed files with 460 additions and 0 deletions
--- a/voice/factory.py
+++ b/voice/factory.py
@@ -46,4 +46,8 @@ def create_voice(voice_type):
        from voice.edge.edge_voice import EdgeVoice

        return EdgeVoice()
+    elif voice_type == "xunfei":
+        from voice.xunfei.xunfei_voice import XunfeiVoice
+
+        return XunfeiVoice()
    raise RuntimeError
--- a/voice/xunfei/config.json.template
+++ b/voice/xunfei/config.json.template
@@ -0,0 +1,7 @@
+{
+  "APPID":"xxx71xxx",  #讯飞xfyun.cn控制台中应用的ID
+  "APIKey":"xxxx69058exxxxxx",  #讯飞xfyun.cn控制台语音合成或者听写界面的APIKey
+  "APISecret":"xxxx697f0xxxxxx",  #讯飞xfyun.cn控制台语音合成或者听写界面的APIKey
+  "BusinessArgsTTS":{"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": "xiaoyan", "tte": "utf8"}, #语音合成的参数，具体可以参考xfyun.cn的文档
+  "BusinessArgsASR":{"domain": "iat", "language": "zh_cn", "accent": "mandarin", "vad_eos":10000, "dwa": "wpgs"}  #语音听写的参数，具体可以参考xfyun.cn的文档
+}
--- a/voice/xunfei/xunfei_asr.py
+++ b/voice/xunfei/xunfei_asr.py
@@ -0,0 +1,209 @@
+# -*- coding:utf-8 -*-
+#
+#  Author: njnuko 
+#  Email: njnuko@163.com 
+#
+#  这个文档是基于官方的demo来改的，固体官方demo文档请参考官网
+#
+#  语音听写流式 WebAPI 接口调用示例 接口文档（必看）：https://doc.xfyun.cn/rest_api/语音听写（流式版）.html
+#  webapi 听写服务参考帖子（必看）：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38947&extra=
+#  语音听写流式WebAPI 服务，热词使用方式：登陆开放平台https://www.xfyun.cn/后，找到控制台--我的应用---语音听写（流式）---服务管理--个性化热词，
+#  设置热词
+#  注意：热词只能在识别的时候会增加热词的识别权重，需要注意的是增加相应词条的识别率，但并不是绝对的，具体效果以您测试为准。
+#  语音听写流式WebAPI 服务，方言试用方法：登陆开放平台https://www.xfyun.cn/后，找到控制台--我的应用---语音听写（流式）---服务管理--识别语种列表
+#  可添加语种或方言，添加后会显示该方言的参数值
+#  错误码链接：https://www.xfyun.cn/document/error-code （code返回错误码时必看）
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+import websocket
+import datetime
+import hashlib
+import base64
+import hmac
+import json
+from urllib.parse import urlencode
+import time
+import ssl
+from wsgiref.handlers import format_date_time
+from datetime import datetime
+from time import mktime
+import _thread as thread
+import os
+import wave
+
+
+STATUS_FIRST_FRAME = 0  # 第一帧的标识
+STATUS_CONTINUE_FRAME = 1  # 中间帧标识
+STATUS_LAST_FRAME = 2  # 最后一帧的标识
+
+#############
+#whole_dict 是用来存储返回值的，由于带语音修正，所以用dict来存储，有更新的化pop之前的值，最后再合并
+global whole_dict
+#这个文档是官方文档改的，这个参数是用来做函数调用时用的
+global wsParam
+##############
+
+
+class Ws_Param(object):
+    # 初始化
+    def __init__(self, APPID, APIKey, APISecret,BusinessArgs, AudioFile):
+        self.APPID = APPID
+        self.APIKey = APIKey
+        self.APISecret = APISecret
+        self.AudioFile = AudioFile
+        self.BusinessArgs = BusinessArgs
+        # 公共参数(common)
+        self.CommonArgs = {"app_id": self.APPID}
+        # 业务参数(business)，更多个性化参数可在官网查看
+        #self.BusinessArgs = {"domain": "iat", "language": "zh_cn", "accent": "mandarin", "vinfo":1,"vad_eos":10000}
+
+    # 生成url
+    def create_url(self):
+        url = 'wss://ws-api.xfyun.cn/v2/iat'
+        # 生成RFC1123格式的时间戳
+        now = datetime.now()
+        date = format_date_time(mktime(now.timetuple()))
+
+        # 拼接字符串
+        signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
+        signature_origin += "date: " + date + "\n"
+        signature_origin += "GET " + "/v2/iat " + "HTTP/1.1"
+        # 进行hmac-sha256进行加密
+        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
+                                 digestmod=hashlib.sha256).digest()
+        signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')
+
+        authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
+            self.APIKey, "hmac-sha256", "host date request-line", signature_sha)
+        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
+        # 将请求的鉴权参数组合为字典
+        v = {
+            "authorization": authorization,
+            "date": date,
+            "host": "ws-api.xfyun.cn"
+        }
+        # 拼接鉴权参数，生成url
+        url = url + '?' + urlencode(v)
+        #print("date: ",date)
+        #print("v: ",v)
+        # 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释，比对相同参数时生成的url与自己代码生成的url是否一致
+        #print('websocket url :', url)
+        return url
+
+
+# 收到websocket消息的处理
+def on_message(ws, message):
+    global whole_dict
+    try:
+        code = json.loads(message)["code"]
+        sid = json.loads(message)["sid"]
+        if code != 0:
+            errMsg = json.loads(message)["message"]
+            print("sid:%s call error:%s code is:%s" % (sid, errMsg, code))
+        else:
+            temp1 = json.loads(message)["data"]["result"]
+            data = json.loads(message)["data"]["result"]["ws"]
+            sn = temp1["sn"]
+            if "rg" in temp1.keys():
+                rep = temp1["rg"]
+                rep_start = rep[0]
+                rep_end = rep[1]
+                for sn in range(rep_start,rep_end+1):
+                    print("before pop",whole_dict)
+                    print("sn",sn)
+                    whole_dict.pop(sn,None)
+                    print("after pop",whole_dict)
+                results = ""
+                for i in data:
+                    for w in i["cw"]:
+                        results += w["w"]
+                whole_dict[sn]=results
+                print("after add",whole_dict)
+            else:
+                results = ""
+                for i in data:
+                    for w in i["cw"]:
+                        results += w["w"]
+                whole_dict[sn]=results
+            print("sid:%s call success!,data is:%s" % (sid, json.dumps(data, ensure_ascii=False)))
+    except Exception as e:
+        print("receive msg,but parse exception:", e)
+
+
+
+# 收到websocket错误的处理
+def on_error(ws, error):
+    print("### error:", error)
+
+
+# 收到websocket关闭的处理
+def on_close(ws,a,b):
+    print("### closed ###")
+
+
+# 收到websocket连接建立的处理
+def on_open(ws):
+    global wsParam
+    def run(*args):
+        frameSize = 8000  # 每一帧的音频大小
+        intervel = 0.04  # 发送音频间隔(单位:s)
+        status = STATUS_FIRST_FRAME  # 音频的状态信息，标识音频是第一帧，还是中间帧、最后一帧
+
+        with wave.open(wsParam.AudioFile, "rb") as fp:
+            while True:
+                buf = fp.readframes(frameSize)
+                # 文件结束
+                if not buf:
+                    status = STATUS_LAST_FRAME
+                # 第一帧处理
+                # 发送第一帧音频，带business 参数
+                # appid 必须带上，只需第一帧发送
+                if status == STATUS_FIRST_FRAME:
+                    d = {"common": wsParam.CommonArgs,
+                         "business": wsParam.BusinessArgs,
+                         "data": {"status": 0, "format": "audio/L16;rate=16000","audio": str(base64.b64encode(buf), 'utf-8'), "encoding": "raw"}} 
+                    d = json.dumps(d)
+                    ws.send(d)
+                    status = STATUS_CONTINUE_FRAME
+                # 中间帧处理
+                elif status == STATUS_CONTINUE_FRAME:
+                    d = {"data": {"status": 1, "format": "audio/L16;rate=16000",
+                                  "audio": str(base64.b64encode(buf), 'utf-8'),
+                                  "encoding": "raw"}}
+                    ws.send(json.dumps(d))
+                # 最后一帧处理
+                elif status == STATUS_LAST_FRAME:
+                    d = {"data": {"status": 2, "format": "audio/L16;rate=16000",
+                                  "audio": str(base64.b64encode(buf), 'utf-8'),
+                                  "encoding": "raw"}}
+                    ws.send(json.dumps(d))
+                    time.sleep(1)
+                    break
+                # 模拟音频采样间隔
+                time.sleep(intervel)
+        ws.close()
+
+    thread.start_new_thread(run, ())
+
+#提供给xunfei_voice调用的函数
+def xunfei_asr(APPID,APISecret,APIKey,BusinessArgsASR,AudioFile):
+    global whole_dict
+    global wsParam
+    whole_dict = {}
+    wsParam1 = Ws_Param(APPID=APPID, APISecret=APISecret,
+                       APIKey=APIKey,BusinessArgs=BusinessArgsASR,
+                       AudioFile=AudioFile)
+    #wsParam是global变量，给上面on_open函数调用使用的
+    wsParam = wsParam1
+    websocket.enableTrace(True)
+    wsUrl = wsParam.create_url()
+    ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close)
+    ws.on_open = on_open
+    ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
+    #把字典的值合并起来做最后识别的输出
+    whole_words = ""
+    for i in sorted(whole_dict.keys()):
+        whole_words += whole_dict[i]
+    return whole_words
+
+     
--- a/voice/xunfei/xunfei_tts.py
+++ b/voice/xunfei/xunfei_tts.py
@@ -0,0 +1,163 @@
+# -*- coding:utf-8 -*-
+#
+#  Author: njnuko
+#  Email: njnuko@163.com
+#
+#  这个文档是基于官方的demo来改的，固体官方demo文档请参考官网
+#
+#  语音听写流式 WebAPI 接口调用示例 接口文档（必看）：https://doc.xfyun.cn/rest_api/语音听写（流式版）.html
+#  webapi 听写服务参考帖子（必看）：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38947&extra=
+#  语音听写流式WebAPI 服务，热词使用方式：登陆开放平台https://www.xfyun.cn/后，找到控制台--我的应用---语音听写（流式）---服务管理--个性化热词，
+#  设置热词
+#  注意：热词只能在识别的时候会增加热词的识别权重，需要注意的是增加相应词条的识别率，但并不是绝对的，具体效果以您测试为准。
+#  语音听写流式WebAPI 服务，方言试用方法：登陆开放平台https://www.xfyun.cn/后，找到控制台--我的应用---语音听写（流式）---服务管理--识别语种列表
+#  可添加语种或方言，添加后会显示该方言的参数值
+#  错误码链接：https://www.xfyun.cn/document/error-code （code返回错误码时必看）
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+import websocket
+import datetime
+import hashlib
+import base64
+import hmac
+import json
+from urllib.parse import urlencode
+import time
+import ssl
+from wsgiref.handlers import format_date_time
+from datetime import datetime
+from time import mktime
+import _thread as thread
+import os
+
+
+
+STATUS_FIRST_FRAME = 0  # 第一帧的标识
+STATUS_CONTINUE_FRAME = 1  # 中间帧标识
+STATUS_LAST_FRAME = 2  # 最后一帧的标识
+
+#############
+#这个参数是用来做输出文件路径的
+global outfile
+#这个文档是官方文档改的，这个参数是用来做函数调用时用的
+global wsParam
+##############
+
+
+class Ws_Param(object):
+    # 初始化
+    def __init__(self, APPID, APIKey, APISecret,BusinessArgs,Text):
+        self.APPID = APPID
+        self.APIKey = APIKey
+        self.APISecret = APISecret
+        self.BusinessArgs = BusinessArgs
+        self.Text = Text
+
+        # 公共参数(common)
+        self.CommonArgs = {"app_id": self.APPID}
+        # 业务参数(business)，更多个性化参数可在官网查看
+        #self.BusinessArgs = {"aue": "raw", "auf": "audio/L16;rate=16000", "vcn": "xiaoyan", "tte": "utf8"}
+        self.Data = {"status": 2, "text": str(base64.b64encode(self.Text.encode('utf-8')), "UTF8")}
+        #使用小语种须使用以下方式，此处的unicode指的是 utf16小端的编码方式，即"UTF-16LE"”
+        #self.Data = {"status": 2, "text": str(base64.b64encode(self.Text.encode('utf-16')), "UTF8")}
+
+    # 生成url
+    def create_url(self):
+        url = 'wss://tts-api.xfyun.cn/v2/tts'
+        # 生成RFC1123格式的时间戳
+        now = datetime.now()
+        date = format_date_time(mktime(now.timetuple()))
+
+        # 拼接字符串
+        signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
+        signature_origin += "date: " + date + "\n"
+        signature_origin += "GET " + "/v2/tts " + "HTTP/1.1"
+        # 进行hmac-sha256进行加密
+        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
+                                 digestmod=hashlib.sha256).digest()
+        signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')
+
+        authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
+            self.APIKey, "hmac-sha256", "host date request-line", signature_sha)
+        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
+        # 将请求的鉴权参数组合为字典
+        v = {
+            "authorization": authorization,
+            "date": date,
+            "host": "ws-api.xfyun.cn"
+        }
+        # 拼接鉴权参数，生成url
+        url = url + '?' + urlencode(v)
+        # print("date: ",date)
+        # print("v: ",v)
+        # 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释，比对相同参数时生成的url与自己代码生成的url是否一致
+        # print('websocket url :', url)
+        return url
+
+def on_message(ws, message):
+    #输出文件
+    global outfile
+    try:
+        message =json.loads(message)
+        code = message["code"]
+        sid = message["sid"]
+        audio = message["data"]["audio"]
+        audio = base64.b64decode(audio)
+        status = message["data"]["status"]
+        if status == 2:
+            print("ws is closed")
+            ws.close()
+        if code != 0:
+            errMsg = message["message"]
+            print("sid:%s call error:%s code is:%s" % (sid, errMsg, code))
+        else:
+
+            with open(outfile, 'ab') as f:
+                f.write(audio)
+
+    except Exception as e:
+        print("receive msg,but parse exception:", e)
+
+
+
+# 收到websocket连接建立的处理
+def on_open(ws):
+    global outfile
+    global wsParam
+    def run(*args):
+        d = {"common": wsParam.CommonArgs,
+             "business": wsParam.BusinessArgs,
+             "data": wsParam.Data,
+             }
+        d = json.dumps(d)
+        print("------>开始发送文本数据")
+        ws.send(d)
+        if os.path.exists(outfile):
+            os.remove(outfile)
+
+    thread.start_new_thread(run, ())
+
+# 收到websocket错误的处理
+def on_error(ws, error):
+    print("### error:", error)
+
+
+
+# 收到websocket关闭的处理
+def on_close(ws):
+    print("### closed ###")
+
+
+
+def xunfei_tts(APPID, APIKey, APISecret,BusinessArgsTTS, Text, OutFile):
+    global outfile
+    global wsParam 
+    outfile = OutFile
+    wsParam1 = Ws_Param(APPID,APIKey,APISecret,BusinessArgsTTS,Text)
+    wsParam = wsParam1
+    websocket.enableTrace(False)
+    wsUrl = wsParam.create_url()
+    ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close)
+    ws.on_open = on_open
+    ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
+    return outfile
+     
--- a/voice/xunfei/xunfei_voice.py
+++ b/voice/xunfei/xunfei_voice.py
@@ -0,0 +1,77 @@
+#####################################################################
+#    xunfei voice service
+#     Auth: njnuko
+#     Email: njnuko@163.com
+#
+#    要使用本模块, 首先到 xfyun.cn 注册一个开发者账号,
+#    之后创建一个新应用, 然后在应用管理的语音识别或者语音合同右边可以查看APPID API Key 和 Secret Key
+#    然后在 config.json 中填入这三个值
+#####################################################################
+
+import json
+import os
+import time
+
+from bridge.reply import Reply, ReplyType
+from common.log import logger
+from common.tmp_dir import TmpDir
+from config import conf
+from voice.voice import Voice
+from .xunfei_asr import xunfei_asr
+from .xunfei_tts import xunfei_tts
+from voice.audio_convert import any_to_mp3
+import shutil
+from pydub import AudioSegment
+
+
+class XunfeiVoice(Voice):
+    def __init__(self):
+        try:
+            curdir = os.path.dirname(__file__)
+            config_path = os.path.join(curdir, "config.json")
+            conf = None
+            with open(config_path, "r") as fr:
+                conf = json.load(fr)
+            print(conf)
+            self.APPID = str(conf.get("APPID"))
+            self.APIKey = str(conf.get("APIKey"))
+            self.APISecret = str(conf.get("APISecret"))
+            self.BusinessArgsTTS = conf.get("BusinessArgsTTS")
+            self.BusinessArgsASR= conf.get("BusinessArgsASR")
+
+        except Exception as e:
+            logger.warn("XunfeiVoice init failed: %s, ignore " % e)
+
+    def voiceToText(self, voice_file):
+        # 识别本地文件
+        try:
+            logger.debug("[Xunfei] voice file name={}".format(voice_file))
+            #print("voice_file===========",voice_file)
+            #print("voice_file_type===========",type(voice_file))
+            #mp3_name, file_extension = os.path.splitext(voice_file)
+            #mp3_file = mp3_name + ".mp3"
+            #pcm_data=get_pcm_from_wav(voice_file)
+            #mp3_name, file_extension = os.path.splitext(voice_file)
+            #AudioSegment.from_wav(voice_file).export(mp3_file, format="mp3")
+            #shutil.copy2(voice_file, 'tmp/test1.wav')
+            #shutil.copy2(mp3_file, 'tmp/test1.mp3')
+            #print("voice and mp3 file",voice_file,mp3_file)
+            text = xunfei_asr(self.APPID,self.APISecret,self.APIKey,self.BusinessArgsASR,voice_file)
+            logger.info("讯飞语音识别到了: {}".format(text))
+            reply = Reply(ReplyType.TEXT, text)
+        except Exception as e:
+            logger.warn("XunfeiVoice init failed: %s, ignore " % e)
+            reply = Reply(ReplyType.ERROR, "讯飞语音识别出错了；{0}")
+        return reply
+
+    def textToVoice(self, text):
+        try:
+            # Avoid the same filename under multithreading
+            fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3"
+            return_file = xunfei_tts(self.APPID,self.APIKey,self.APISecret,self.BusinessArgsTTS,text,fileName)
+            logger.info("[Xunfei] textToVoice text={} voice file name={}".format(text, fileName))
+            reply = Reply(ReplyType.VOICE, fileName)
+        except Exception as e:
+            logger.error("[Xunfei] textToVoice error={}".format(fileName))
+            reply = Reply(ReplyType.ERROR, "抱歉，讯飞语音合成失败")
+        return reply