feat: add dalle3 gpt-4-turbo model change

2026-06-17 06:27:28 +08:00 · 2023-11-10 10:11:02 +08:00
parent db8e506de0
commit f001ac6903
16 changed files with 166 additions and 25 deletions
@@ -7,7 +7,7 @@
 - [x] **多端部署：** 有多种部署方式可选择且功能完备，目前已支持个人微信，微信公众号和企业微信应用等部署方式
 - [x] **基础对话：** 私聊及群聊的消息智能回复，支持多轮会话上下文记忆，支持 GPT-3.5, GPT-4, claude, 文心一言, 讯飞星火
 - [x] **语音识别：** 可识别语音消息，通过文字或语音回复，支持 azure, baidu, google, openai等多种语音模型
- [x] **图片生成：** 支持图片生成 和 图生图（如照片修复），可选择 Dell-E, stable diffusion, replicate, midjourney模型
+- [x] **图片生成：** 支持图片生成 和 图生图（如照片修复），可选择 Dall-E, stable diffusion, replicate, midjourney模型
 - [x] **丰富插件：** 支持个性化插件扩展，已实现多角色切换、文字冒险、敏感词过滤、聊天记录总结、文档总结和对话等插件
 - [X] **Tool工具：** 与操作系统和互联网交互，支持最新信息搜索、数学计算、天气和资讯查询、网页总结，基于 [chatgpt-tool-hub](https://github.com/goldfishh/chatgpt-tool-hub) 实现
 - [x] **知识库：** 通过上传知识库文件自定义专属机器人，可作为数字分身、领域知识库、智能客服使用，基于 [LinkAI](https://chat.link-ai.tech/console) 实现
@@ -62,10 +62,10 @@ def num_tokens_from_messages(messages, model):
    import tiktoken
-    if model in ["gpt-3.5-turbo-0301", "gpt-35-turbo"]:
+    if model in ["gpt-3.5-turbo-0301", "gpt-35-turbo", "gpt-3.5-turbo-1106"]:
        return num_tokens_from_messages(messages, model="gpt-3.5-turbo")
    elif model in ["gpt-4-0314", "gpt-4-0613", "gpt-4-32k", "gpt-4-32k-0613", "gpt-3.5-turbo-0613",
-                   "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-35-turbo-16k", const.GPT4_PREVIEW, const.GPT4_VISION_PREVIEW]:
+                   "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-35-turbo-16k", const.GPT4_TURBO_PREVIEW, const.GPT4_VISION_PREVIEW]:
        return num_tokens_from_messages(messages, model="gpt-4")
    try:
@@ -15,7 +15,7 @@ from common.log import logger
 from config import conf, pconf
-class LinkAIBot(Bot, OpenAIImage):
+class LinkAIBot(Bot):
    # authentication failed
    AUTH_FAILED_CODE = 401
    NO_QUOTA_CODE = 406
@@ -193,6 +193,32 @@ class LinkAIBot(Bot, OpenAIImage):
            return self.reply_text(session, app_code, retry_count + 1)
    def create_img(self, query, retry_count=0, api_key=None):
        try:
            logger.info("[LinkImage] image_query={}".format(query))
            headers = {
                "Content-Type": "application/json",
                "Authorization": f"Bearer {conf().get('linkai_api_key')}"
            }
            data = {
                "prompt": query,
                "n": 1,
                "model": conf().get("text_to_image") or "dall-e-2",
                "response_format": "url",
                "img_proxy": conf().get("image_proxy")
            }
            url = conf().get("linkai_api_base", "https://api.link-ai.chat") + "/v1/images/generations"
            res = requests.post(url, headers=headers, json=data, timeout=(5, 90))
            t2 = time.time()
            image_url = res.json()["data"][0]["url"]
            logger.info("[OPEN_AI] image_url={}".format(image_url))
            return True, image_url
        except Exception as e:
            logger.error(format(e))
            return False, "画图出现问题，请休息一下再问我吧"
    def _fetch_knowledge_search_suffix(self, response) -> str:
        try:
            if response.get("knowledge_base"):
@@ -24,7 +24,8 @@ class OpenAIImage(object):
                api_key=api_key,
                prompt=query,  # 图片描述
                n=1,  # 每次生成图片的数量
-                size=conf().get("image_create_size", "256x256"),  # 图片大小,可选有 256x256, 512x512, 1024x1024
+                model=conf().get("text_to_image") or "dall-e-2",
                # size=conf().get("image_create_size", "256x256"),  # 图片大小,可选有 256x256, 512x512, 1024x1024
            )
            image_url = response["data"][0]["url"]
            logger.info("[OPEN_AI] image_url={}".format(image_url))
@@ -36,7 +37,7 @@ class OpenAIImage(object):
                logger.warn("[OPEN_AI] ImgCreate RateLimit exceed, 第{}次重试".format(retry_count + 1))
                return self.create_img(query, retry_count + 1)
            else:
-                return False, "提问太快啦，请休息一下再问我吧"
+                return False, "画图出现问题，请休息一下再问我吧"
        except Exception as e:
            logger.exception(e)
-            return False, str(e)
+            return False, "画图出现问题，请休息一下再问我吧"
@@ -29,6 +29,10 @@ class Bridge(object):
            self.btype["chat"] = const.XUNFEI
        if conf().get("use_linkai") and conf().get("linkai_api_key"):
            self.btype["chat"] = const.LINKAI
            if not conf().get("voice_to_text") or conf().get("voice_to_text") in ["openai"]:
                self.btype["voice_to_text"] = const.LINKAI
            if not conf().get("text_to_voice") or conf().get("text_to_voice") in [const.TTS_1, const.TTS_1_HD]:
                self.btype["text_to_voice"] = const.LINKAI
        if model_type in ["claude"]:
            self.btype["chat"] = const.CLAUDEAI
        self.bots = {}
@@ -91,6 +91,7 @@ class ChatChannel(Channel):
        # 消息内容匹配过程，并处理content
        if ctype == ContextType.TEXT:
            if first_in and "」\n- - - - - - -" in content:  # 初次匹配 过滤引用消息
                logger.debug(content)
                logger.debug("[WX]reference query skipped")
                return None
@@ -7,9 +7,12 @@ CHATGPTONAZURE = "chatGPTOnAzure"
 LINKAI = "linkai"
 CLAUDEAI = "claude"
 MODEL_LIST = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "wenxin", "wenxin-4", "xunfei", "claude"]
 # model
 GPT4 = "gpt-4"
-GPT4_PREVIEW = "gpt-4-1106-preview"
+GPT4_TURBO_PREVIEW = "gpt-4-1106-preview"
 GPT4_VISION_PREVIEW = "gpt-4-vision-preview"
 WHISPER_1 = "whisper-1"
 TTS_1 = "tts-1"
 TTS_1_HD = "tts-1-hd"
 MODEL_LIST = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "wenxin", "wenxin-4", "xunfei", "claude", "gpt-4-turbo", GPT4_TURBO_PREVIEW]
@@ -1,7 +1,10 @@
 {
  "channel_type": "wx",
  "open_ai_api_key": "YOUR API KEY",
  "model": "gpt-3.5-turbo",
-  "channel_type": "wx",
+  "text_to_image": "dall-e-2",
  "voice_to_text": "openai",
  "text_to_voice": "openai",
  "proxy": "",
  "hot_reload": false,
  "single_chat_prefix": [
@@ -22,9 +25,10 @@
  "image_create_prefix": [
    "画"
  ],
-  "speech_recognition": false,
+  "speech_recognition": true,
  "group_speech_recognition": false,
  "voice_reply_voice": false,
  "tts_voice_id": "alloy",
  "conversation_max_tokens": 1000,
  "expires_in_seconds": 3600,
  "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题，并且可以使用多种语言与人交流。",
@@ -34,9 +34,11 @@ available_setting = {
    "group_chat_in_one_session": ["ChatGPT测试群"],  # 支持会话上下文共享的群名称
    "group_welcome_msg": "",  # 配置新人进群固定欢迎语，不配置则使用随机风格欢迎 
    "trigger_by_self": False,  # 是否允许机器人触发
    "text_to_image": "dall-e-2",  # 图片生成模型，可选 dall-e-2, dall-e-3
    "image_proxy": True,  # 是否需要图片代理，国内访问LinkAI时需要
    "image_create_prefix": ["画", "看", "找"],  # 开启图片回复的前缀
    "concurrency_in_session": 1,  # 同一会话最多有多少条消息在处理中，大于1可能乱序
-    "image_create_size": "256x256",  # 图片大小,可选有 256x256, 512x512, 1024x1024
+    "image_create_size": "256x256",  # 图片大小,可选有 256x256, 512x512, 1024x1024 (dall-e-3默认为1024x1024)
    # chatgpt会话参数
    "expires_in_seconds": 3600,  # 无操作会话的过期时间
    # 人格描述
@@ -66,12 +68,13 @@ available_setting = {
    # wework的通用配置
    "wework_smart": True,  # 配置wework是否使用已登录的企业微信，False为多开
    # 语音设置
-    "speech_recognition": False,  # 是否开启语音识别
+    "speech_recognition": True,  # 是否开启语音识别
    "group_speech_recognition": False,  # 是否开启群组语音识别
    "voice_reply_voice": False,  # 是否使用语音回复语音，需要设置对应语音合成引擎的api key
    "always_reply_voice": False,  # 是否一直使用语音回复
    "voice_to_text": "openai",  # 语音识别引擎，支持openai,baidu,google,azure
-    "text_to_voice": "baidu",  # 语音合成引擎，支持baidu,google,pytts(offline),azure,elevenlabs
+    "text_to_voice": "tts-1",  # 语音合成引擎，支持tts-1,tts-1-hd,baidu,google,pytts(offline),azure,elevenlabs
    "tts_voice_id": "alloy",
    # baidu 语音api配置， 使用百度语音识别和语音合成时需要
    "baidu_app_id": "",
    "baidu_api_key": "",
@@ -271,7 +271,7 @@ class Godcmd(Plugin):
                        if args[0] not in const.MODEL_LIST:
                            ok, result = False, "模型名称不存在"
                        else:
-                            conf()["model"] = args[0]
+                            conf()["model"] = self.model_mapping(args[0])
                            Bridge().reset_bot()
                            ok, result = True, "模型设置为: " + str(conf().get("model"))
                elif cmd == "id":
@@ -467,3 +467,9 @@ class Godcmd(Plugin):
        if context["isgroup"]:
            return context.kwargs.get("msg").actual_user_id in global_config["admin_users"]
        return False
    def model_mapping(self, model) -> str:
        if model == "gpt-4-turbo":
            return const.GPT4_TURBO_PREVIEW
        return model
@@ -14,6 +14,7 @@
    "summary": {
        "enabled": true,
        "group_enabled": true,
-        "max_file_size": 5000
+        "max_file_size": 5000,
        "type": ["FILE", "SHARING", "IMAGE"]
    }
 }
@@ -46,19 +46,23 @@ class LinkAI(Plugin):
            # filter content no need solve
            return
-        if context.type == ContextType.FILE and self._is_summary_open(context):
+        if context.type in [ContextType.FILE, ContextType.IMAGE] and self._is_summary_open(context):
            # 文件处理
            context.get("msg").prepare()
            file_path = context.content
            if not LinkSummary().check_file(file_path, self.sum_config):
                return
-            _send_info(e_context, "正在为你加速生成摘要，请稍后")
+            if context.type != ContextType.IMAGE:
                _send_info(e_context, "正在为你加速生成摘要，请稍后")
            res = LinkSummary().summary_file(file_path)
            if not res:
-                _set_reply_text("因为神秘力量无法获取文章内容，请稍后再试吧", e_context, level=ReplyType.TEXT)
+                _set_reply_text("因为神秘力量无法获取内容，请稍后再试吧", e_context, level=ReplyType.TEXT)
                return
-            USER_FILE_MAP[_find_user_id(context) + "-sum_id"] = res.get("summary_id")
+            summary_text = res.get("summary")
-            _set_reply_text(res.get("summary") + "\n\n💬 发送 \"开启对话\" 可以开启与文件内容的对话", e_context, level=ReplyType.TEXT)
+            if context.type != ContextType.IMAGE:
                USER_FILE_MAP[_find_user_id(context) + "-sum_id"] = res.get("summary_id")
                summary_text += "\n\n💬 发送 \"开启对话\" 可以开启与文件内容的对话"
            _set_reply_text(summary_text, e_context, level=ReplyType.TEXT)
            os.remove(file_path)
            return
@@ -187,6 +191,11 @@ class LinkAI(Plugin):
            return False
        if context.kwargs.get("isgroup") and not self.sum_config.get("group_enabled"):
            return False
        support_type = self.sum_config.get("type")
        if not support_type:
            return True
        if context.type.name not in support_type:
            return False
        return True
    # LinkAI 对话任务处理
@@ -13,7 +13,8 @@ class LinkSummary:
            "file": open(file_path, "rb"),
            "name": file_path.split("/")[-1],
        }
-        res = requests.post(url=self.base_url() + "/v1/summary/file", headers=self.headers(), files=file_body, timeout=(5, 300))
+        url = self.base_url() + "/v1/summary/file"
        res = requests.post(url, headers=self.headers(), files=file_body, timeout=(5, 300))
        return self._parse_summary_res(res)
    def summary_url(self, url: str):
@@ -71,7 +72,7 @@ class LinkSummary:
            return False
        suffix = file_path.split(".")[-1]
-        support_list = ["txt", "csv", "docx", "pdf", "md"]
+        support_list = ["txt", "csv", "docx", "pdf", "md", "jpg", "jpeg", "png"]
        if suffix not in support_list:
            logger.warn(f"[LinkSum] unsupported file, suffix={suffix}, support_list={support_list}")
            return False
@@ -33,4 +33,8 @@ def create_voice(voice_type):
        from voice.elevent.elevent_voice import ElevenLabsVoice
        return ElevenLabsVoice()
    elif voice_type == "linkai":
        from voice.linkai.linkai_voice import LinkAIVoice
        return LinkAIVoice()
    raise RuntimeError
@@ -0,0 +1,78 @@
 """
 google voice service
 """
 import json
 import os
 import random
 import openai
 import requests
 from bridge.reply import Reply, ReplyType
 from common.log import logger
 from config import conf
 from voice.voice import Voice
 from common import const
 import datetime
 class LinkAIVoice(Voice):
    def __init__(self):
        pass
    def voiceToText(self, voice_file):
        logger.debug("[LinkVoice] voice file name={}".format(voice_file))
        try:
            url = conf().get("linkai_api_base", "https://api.link-ai.chat") + "/v1/audio/transcriptions"
            headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
            model = None
            if not conf().get("text_to_voice") or conf().get("voice_to_text") == "openai":
                model = const.WHISPER_1
            file = open(voice_file, "rb")
            file_body = {
                "file": file
            }
            data = {
                "model": model
            }
            res = requests.post(url, files=file_body, headers=headers, data=data, timeout=(5, 60))
            if res.status_code == 200:
                text = res.json().get("text")
            else:
                res_json = res.json()
                logger.error(f"[LinkVoice] voiceToText error, status_code={res.status_code}, msg={res_json.get('message')}")
                return None
            reply = Reply(ReplyType.TEXT, text)
            logger.info(f"[LinkVoice] voiceToText success, text={text}, file name={voice_file}")
        except Exception as e:
            logger.error(e)
            reply = Reply(ReplyType.ERROR, "我暂时还无法听清您的语音，请稍后再试吧~")
        return reply
    def textToVoice(self, text):
        try:
            url = conf().get("linkai_api_base", "https://api.link-ai.chat") + "/v1/audio/speech"
            headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")}
            model = const.TTS_1
            if not conf().get("text_to_voice") or conf().get("text_to_voice") in [const.TTS_1, const.TTS_1_HD]:
                model = conf().get("text_to_voice") or const.TTS_1
            data = {
                "model": model,
                "input": text,
                "voice": conf().get("tts_voice_id")
            }
            res = requests.post(url, headers=headers, json=data, timeout=(5, 120))
            if res.status_code == 200:
                tmp_file_name = "tmp/" + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + str(random.randint(0, 1000)) + ".mp3"
                with open(tmp_file_name, 'wb') as f:
                    f.write(res.content)
                reply = Reply(ReplyType.VOICE, tmp_file_name)
                logger.info(f"[LinkVoice] textToVoice success, input={text}, model={model}, voice_id={data.get('voice')}")
                return reply
            else:
                res_json = res.json()
                logger.error(f"[LinkVoice] textToVoice error, status_code={res.status_code}, msg={res_json.get('message')}")
                return None
        except Exception as e:
            logger.error(e)
            reply = Reply(ReplyType.ERROR, "遇到了一点小问题，请稍后再问我吧")
        return reply
@@ -24,6 +24,6 @@ class OpenaiVoice(Voice):
            reply = Reply(ReplyType.TEXT, text)
            logger.info("[Openai] voiceToText text={} voice file name={}".format(text, voice_file))
        except Exception as e:
-            reply = Reply(ReplyType.ERROR, str(e))
+            reply = Reply(ReplyType.ERROR, "我暂时还无法听清您的语音，请稍后再试吧~")
        finally:
            return reply