Merge branch 'zwssunny-master' into master

2026-02-12 18:29:21 +08:00 · 2023-03-28 03:16:31 +08:00
parent 48e066b677 2a21941b68
commit 3e2c68ba49
12 changed files with 291 additions and 181 deletions
--- a/app.py
+++ b/app.py
@@ -1,6 +1,6 @@
 # encoding:utf-8

-import config
+from config import conf, load_config
 from channel import channel_factory
 from common.log import logger

@@ -9,10 +9,10 @@ from plugins import *
 def run():
    try:
        # load config
-        config.load_config()
+        load_config()

        # create channel
-        channel_name='wx'
+        channel_name=conf().get('channel_type', 'wx')
        channel = channel_factory.create_channel(channel_name)
        if channel_name=='wx':
            PluginManager().load_plugins()
--- a/bot/bot_factory.py
+++ b/bot/bot_factory.py
@@ -6,9 +6,9 @@ from common import const

 def create_bot(bot_type):
    """
-    create a channel instance
-    :param channel_type: channel type code
-    :return: channel instance
+    create a bot_type instance
+    :param bot_type: bot type code
+    :return: bot instance
    """
    if bot_type == const.BAIDU:
        # Baidu Unit对话接口
--- a/channel/wechat/wechat_channel.py
+++ b/channel/wechat/wechat_channel.py
@@ -5,6 +5,9 @@ wechat channel
 """

 import os
+import requests
+import io
+import time
 from lib import itchat
 import json
 from lib.itchat.content import *
@@ -17,17 +20,18 @@ from common.tmp_dir import TmpDir
 from config import conf
 from common.time_check import time_checker
 from plugins import *
-import requests
-import io
-import time
+from voice.audio_convert import mp3_to_wav


 thread_pool = ThreadPoolExecutor(max_workers=8)
+
+
 def thread_pool_callback(worker):
    worker_exception = worker.exception()
    if worker_exception:
        logger.exception("Worker return exception: {}".format(worker_exception))

+
@itchat.msg_register(TEXT)
 def handler_single_msg(msg):
    WechatChannel().handle_text(msg)
@@ -48,6 +52,8 @@ def handler_group_voice(msg):
    WechatChannel().handle_group_voice(msg)
    return None

+
+
 class WechatChannel(Channel):
    def __init__(self):
        self.userName = None
@@ -55,7 +61,7 @@ class WechatChannel(Channel):

    def startup(self):

-        itchat.instance.receivingRetryCount = 600 # 修改断线超时时间
+        itchat.instance.receivingRetryCount = 600  # 修改断线超时时间
        # login by scan QRCode
        hotReload = conf().get('hot_reload', False)
        try:
@@ -119,7 +125,7 @@ class WechatChannel(Channel):
                other_user_id = from_user_id
        create_time = msg['CreateTime']             # 消息时间
        match_prefix = check_prefix(content, conf().get('single_chat_prefix'))
-        if conf().get('hot_reload') == True and int(create_time) < int(time.time()) - 60:    #跳过1分钟前的历史消息
+        if conf().get('hot_reload') == True and int(create_time) < int(time.time()) - 60:  # 跳过1分钟前的历史消息
            logger.debug("[WX]history message skipped")
            return
        if "」\n- - - - - - - - - - - - - - -" in content:
@@ -130,7 +136,8 @@ class WechatChannel(Channel):
        elif match_prefix is None:
            return
        context = Context()
-        context.kwargs = {'isgroup': False, 'msg': msg, 'receiver': other_user_id, 'session_id': other_user_id}
+        context.kwargs = {'isgroup': False, 'msg': msg,
+                          'receiver': other_user_id, 'session_id': other_user_id}

        img_match_prefix = check_prefix(content, conf().get('image_create_prefix'))
        if img_match_prefix:
@@ -148,7 +155,7 @@ class WechatChannel(Channel):
        group_name = msg['User'].get('NickName', None)
        group_id = msg['User'].get('UserName', None)
        create_time = msg['CreateTime']             # 消息时间
-        if conf().get('hot_reload') == True and int(create_time) < int(time.time()) - 60:    #跳过1分钟前的历史消息
+        if conf().get('hot_reload') == True and int(create_time) < int(time.time()) - 60:  # 跳过1分钟前的历史消息
            logger.debug("[WX]history group message skipped")
            return
        if not group_name:
@@ -166,11 +173,11 @@ class WechatChannel(Channel):
            return ""
        config = conf()
        match_prefix = (msg['IsAt'] and not config.get("group_at_off", False)) or check_prefix(origin_content, config.get('group_chat_prefix')) \
-                       or check_contain(origin_content, config.get('group_chat_keyword'))
+            or check_contain(origin_content, config.get('group_chat_keyword'))
        if ('ALL_GROUP' in config.get('group_name_white_list') or group_name in config.get('group_name_white_list') or check_contain(group_name, config.get('group_name_keyword_white_list'))) and match_prefix:
            context = Context()
            context.kwargs = { 'isgroup': True, 'msg': msg, 'receiver': group_id}
-            
+
            img_match_prefix = check_prefix(content, conf().get('image_create_prefix'))
            if img_match_prefix:
                content = content.replace(img_match_prefix, '', 1).strip()
@@ -217,7 +224,7 @@ class WechatChannel(Channel):
            thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)

    # 统一的发送函数，每个Channel自行实现，根据reply的type字段发送不同类型的消息
-    def send(self, reply : Reply, receiver):
+    def send(self, reply: Reply, receiver):
        if reply.type == ReplyType.TEXT:
            itchat.send(reply.content, toUserName=receiver)
            logger.info('[WX] sendMsg={}, receiver={}'.format(reply, receiver))
@@ -250,9 +257,10 @@ class WechatChannel(Channel):
        reply = Reply()

        logger.debug('[WX] ready to handle context: {}'.format(context))
-        
+
        # reply的构建步骤
-        e_context = PluginManager().emit_event(EventContext(Event.ON_HANDLE_CONTEXT, {'channel' : self, 'context': context, 'reply': reply}))
+        e_context = PluginManager().emit_event(EventContext(Event.ON_HANDLE_CONTEXT, {
+            'channel': self, 'context': context, 'reply': reply}))
        reply = e_context['reply']
        if not e_context.is_pass():
            logger.debug('[WX] ready to handle context: type={}, content={}'.format(context.type, context.content))
@@ -260,22 +268,31 @@ class WechatChannel(Channel):
                reply = super().build_reply_content(context.content, context)
            elif context.type == ContextType.VOICE: # 语音消息
                msg = context['msg']
-                file_name = TmpDir().path() + context.content
-                msg.download(file_name)
-                reply = super().build_voice_to_text(file_name)
-                if reply.type == ReplyType.TEXT:
-                    content = reply.content # 语音转文字后，将文字内容作为新的context
-                    # 如果是群消息，判断是否触发关键字
-                    if context['isgroup']:
+                mp3_path = TmpDir().path() + context.content
+                msg.download(mp3_path)
+                # mp3转wav
+                wav_path = os.path.splitext(mp3_path)[0] + '.wav'
+                mp3_to_wav(mp3_path=mp3_path, wav_path=wav_path)
+                # 语音识别
+                reply = super().build_voice_to_text(wav_path)
+                # 删除临时文件
+                os.remove(wav_path)
+                os.remove(mp3_path)
+                if reply.type != ReplyType.ERROR and reply.type != ReplyType.INFO:
+                    content = reply.content  # 语音转文字后，将文字内容作为新的context
+                    context.type = ContextType.TEXT
+                    if context["isgroup"]:
+                        # 校验关键字
                        match_prefix = check_prefix(content, conf().get('group_chat_prefix'))
                        match_contain = check_contain(content, conf().get('group_chat_keyword'))
-                        logger.debug('[WX] group chat prefix match: {}'.format(match_prefix))
-                        if match_prefix is None and match_contain is None:
-                            return
-                        else:
+                        if match_prefix is not None or match_contain is not None:
+                            # 判断如果匹配到自定义前缀，则返回过滤掉前缀+空格后的内容，用于实现类似自定义+前缀触发生成AI图片的功能
                            if match_prefix:
                                content = content.replace(match_prefix, '', 1).strip()
-                        
+                        else:
+                            logger.info("[WX]receive voice, checkprefix didn't match")
+                            return
+                       
                    img_match_prefix = check_prefix(content, conf().get('image_create_prefix'))
                    if img_match_prefix:
                        content = content.replace(img_match_prefix, '', 1).strip()
@@ -292,11 +309,12 @@ class WechatChannel(Channel):
                return

        logger.debug('[WX] ready to decorate reply: {}'.format(reply))
-        
+
        # reply的包装步骤
        if reply and reply.type:
-            e_context = PluginManager().emit_event(EventContext(Event.ON_DECORATE_REPLY, {'channel' : self, 'context': context, 'reply': reply}))
-            reply=e_context['reply']
+            e_context = PluginManager().emit_event(EventContext(Event.ON_DECORATE_REPLY, {
+                'channel': self, 'context': context, 'reply': reply}))
+            reply = e_context['reply']
            if not e_context.is_pass() and reply and reply.type:
                if reply.type == ReplyType.TEXT:
                    reply_text = reply.content
@@ -314,10 +332,11 @@ class WechatChannel(Channel):
                    logger.error('[WX] unknown reply type: {}'.format(reply.type))
                    return

-        # reply的发送步骤   
+        # reply的发送步骤
        if reply and reply.type:
-            e_context = PluginManager().emit_event(EventContext(Event.ON_SEND_REPLY, {'channel' : self, 'context': context, 'reply': reply}))
-            reply=e_context['reply']
+            e_context = PluginManager().emit_event(EventContext(Event.ON_SEND_REPLY, {
+                'channel': self, 'context': context, 'reply': reply}))
+            reply = e_context['reply']
            if not e_context.is_pass() and reply and reply.type:
                logger.debug('[WX] ready to send reply: {} to {}'.format(reply, context['receiver']))
                self.send(reply, context['receiver'])
--- a/channel/wechat/wechaty_channel.py
+++ b/channel/wechat/wechaty_channel.py
@@ -4,25 +4,19 @@
 wechaty channel
 Python Wechaty - https://github.com/wechaty/python-wechaty
 """
-import io
 import os
-import json
 import time
 import asyncio
-import requests
-import pysilk
-import wave
-from pydub import AudioSegment
 from typing import Optional, Union
 from bridge.context import Context, ContextType
 from wechaty_puppet import MessageType, FileBox, ScanStatus  # type: ignore
 from wechaty import Wechaty, Contact
-from wechaty.user import Message, Room, MiniProgram, UrlLink
+from wechaty.user import Message, MiniProgram, UrlLink
 from channel.channel import Channel
 from common.log import logger
 from common.tmp_dir import TmpDir
 from config import conf
-
+from voice.audio_convert import sil_to_wav, mp3_to_sil

 class WechatyChannel(Channel):

@@ -50,8 +44,9 @@ class WechatyChannel(Channel):

    async def on_scan(self, status: ScanStatus, qr_code: Optional[str] = None,
                      data: Optional[str] = None):
-        contact = self.Contact.load(self.contact_id)
-        logger.info('[WX] scan user={}, scan status={}, scan qr_code={}'.format(contact, status.name, qr_code))
+        pass
+        # contact = self.Contact.load(self.contact_id)
+        # logger.info('[WX] scan user={}, scan status={}, scan qr_code={}'.format(contact, status.name, qr_code))
        # print(f'user <{contact}> scan status: {status.name} , 'f'qr_code: {qr_code}')

    async def on_message(self, msg: Message):
@@ -67,7 +62,7 @@ class WechatyChannel(Channel):
        content = msg.text()
        mention_content = await msg.mention_text()  # 返回过滤掉@name后的消息
        match_prefix = self.check_prefix(content, conf().get('single_chat_prefix'))
-        conversation: Union[Room, Contact] = from_contact if room is None else room
+        # conversation: Union[Room, Contact] = from_contact if room is None else room

        if room is None and msg.type() == MessageType.MESSAGE_TYPE_TEXT:
            if not msg.is_self() and match_prefix is not None:
@@ -102,21 +97,8 @@ class WechatyChannel(Channel):
                await voice_file.to_file(silk_file)
                logger.info("[WX]receive voice file: " + silk_file)
                # 将文件转成wav格式音频
-                wav_file = silk_file.replace(".slk", ".wav")
-                with open(silk_file, 'rb') as f:
-                    silk_data = f.read()
-                pcm_data = pysilk.decode(silk_data)
-
-                with wave.open(wav_file, 'wb') as wav_data:
-                    wav_data.setnchannels(1)
-                    wav_data.setsampwidth(2)
-                    wav_data.setframerate(24000)
-                    wav_data.writeframes(pcm_data)
-                if os.path.exists(wav_file): 
-                    converter_state = "true" # 转换wav成功
-                else:
-                    converter_state = "false" # 转换wav失败
-                logger.info("[WX]receive voice converter: " + converter_state)
+                wav_file = os.path.splitext(silk_file)[0] + '.wav'
+                sil_to_wav(silk_file, wav_file)
                # 语音识别为文本
                query = super().build_voice_to_text(wav_file).content
                # 交验关键字
@@ -183,21 +165,8 @@ class WechatyChannel(Channel):
                await voice_file.to_file(silk_file)
                logger.info("[WX]receive voice file: " + silk_file)
                # 将文件转成wav格式音频
-                wav_file = silk_file.replace(".slk", ".wav")
-                with open(silk_file, 'rb') as f:
-                    silk_data = f.read()
-                pcm_data = pysilk.decode(silk_data)
-
-                with wave.open(wav_file, 'wb') as wav_data:
-                    wav_data.setnchannels(1)
-                    wav_data.setsampwidth(2)
-                    wav_data.setframerate(24000)
-                    wav_data.writeframes(pcm_data)
-                if os.path.exists(wav_file): 
-                    converter_state = "true" # 转换wav成功
-                else:
-                    converter_state = "false" # 转换wav失败
-                logger.info("[WX]receive voice converter: " + converter_state)
+                wav_file = os.path.splitext(silk_file)[0] + '.wav'
+                sil_to_wav(silk_file, wav_file)
                # 语音识别为文本
                query = super().build_voice_to_text(wav_file).content
                # 校验关键字
@@ -260,21 +229,12 @@ class WechatyChannel(Channel):
            if reply_text:
                # 转换 mp3 文件为 silk 格式
                mp3_file = super().build_text_to_voice(reply_text).content
-                silk_file = mp3_file.replace(".mp3", ".silk")
-                # Load the MP3 file
-                audio = AudioSegment.from_file(mp3_file, format="mp3")
-                # Convert to WAV format
-                audio = audio.set_frame_rate(24000).set_channels(1)
-                wav_data = audio.raw_data
-                sample_width = audio.sample_width
-                # Encode to SILK format
-                silk_data = pysilk.encode(wav_data, 24000)
-                # Save the silk file
-                with open(silk_file, "wb") as f:
-                    f.write(silk_data)
+                silk_file = os.path.splitext(mp3_file)[0] + '.sil'
+                voiceLength = mp3_to_sil(mp3_file, silk_file)
                # 发送语音
                t = int(time.time())
-                file_box = FileBox.from_file(silk_file, name=str(t) + '.silk')
+                file_box = FileBox.from_file(silk_file, name=str(t) + '.sil')
+                file_box.metadata = {'voiceLength': voiceLength}                
                await self.send(file_box, reply_user_id)
                # 清除缓存文件
                os.remove(mp3_file)
@@ -337,21 +297,12 @@ class WechatyChannel(Channel):
            reply_text = '@' + group_user_name + ' ' + reply_text.strip()
            # 转换 mp3 文件为 silk 格式
            mp3_file = super().build_text_to_voice(reply_text).content
-            silk_file = mp3_file.replace(".mp3", ".silk")
-            # Load the MP3 file
-            audio = AudioSegment.from_file(mp3_file, format="mp3")
-            # Convert to WAV format
-            audio = audio.set_frame_rate(24000).set_channels(1)
-            wav_data = audio.raw_data
-            sample_width = audio.sample_width
-            # Encode to SILK format
-            silk_data = pysilk.encode(wav_data, 24000)
-            # Save the silk file
-            with open(silk_file, "wb") as f:
-                f.write(silk_data)
+            silk_file = os.path.splitext(mp3_file)[0] + '.sil'
+            voiceLength = mp3_to_sil(mp3_file, silk_file)
            # 发送语音
            t = int(time.time())
            file_box = FileBox.from_file(silk_file, name=str(t) + '.silk')
+            file_box.metadata = {'voiceLength': voiceLength}            
            await self.send_group(file_box, group_id)
            # 清除缓存文件
            os.remove(mp3_file)
--- a/config.py
+++ b/config.py
@@ -5,71 +5,77 @@ import os
 from common.log import logger

 # 将所有可用的配置项写在字典里, 请使用小写字母
-available_setting ={
-    #openai api配置
-    "open_ai_api_key": "", # openai api key
-    "open_ai_api_base": "https://api.openai.com/v1", # openai apibase，当use_azure_chatgpt为true时，需要设置对应的api base
-    "proxy": "", # openai使用的代理
-    "model": "gpt-3.5-turbo", # chatgpt模型， 当use_azure_chatgpt为true时，其名称为Azure上model deployment名称
-    "use_azure_chatgpt": False, # 是否使用azure的chatgpt
+available_setting = {
+    # openai api配置
+    "open_ai_api_key": "",  # openai api key
+    # openai apibase，当use_azure_chatgpt为true时，需要设置对应的api base
+    "open_ai_api_base": "https://api.openai.com/v1",
+    "proxy": "",  # openai使用的代理
+    # chatgpt模型， 当use_azure_chatgpt为true时，其名称为Azure上model deployment名称
+    "model": "gpt-3.5-turbo",
+    "use_azure_chatgpt": False,  # 是否使用azure的chatgpt

-    #Bot触发配置
-    "single_chat_prefix": ["bot", "@bot"], # 私聊时文本需要包含该前缀才能触发机器人回复
-    "single_chat_reply_prefix": "[bot] ", # 私聊时自动回复的前缀，用于区分真人
-    "group_chat_prefix": ["@bot"], # 群聊时包含该前缀则会触发机器人回复
-    "group_chat_reply_prefix": "", # 群聊时自动回复的前缀
-    "group_chat_keyword": [], # 群聊时包含该关键词则会触发机器人回复
-    "group_at_off": False, # 是否关闭群聊时@bot的触发
-    "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], # 开启自动回复的群名称列表
-    "group_name_keyword_white_list": [], # 开启自动回复的群名称关键词列表
-    "group_chat_in_one_session": ["ChatGPT测试群"], # 支持会话上下文共享的群名称
-    "image_create_prefix": ["画", "看", "找"], # 开启图片回复的前缀
-    
-    #chatgpt会话参数
-    "expires_in_seconds": 3600, # 无操作会话的过期时间
-    "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题，并且可以使用多种语言与人交流。", # 人格描述
-    "conversation_max_tokens": 1000, # 支持上下文记忆的最多字符数
-            
-    #chatgpt限流配置
-    "rate_limit_chatgpt": 20, # chatgpt的调用频率限制
-    "rate_limit_dalle": 50, # openai dalle的调用频率限制
+    # Bot触发配置
+    "single_chat_prefix": ["bot", "@bot"],  # 私聊时文本需要包含该前缀才能触发机器人回复
+    "single_chat_reply_prefix": "[bot] ",  # 私聊时自动回复的前缀，用于区分真人
+    "group_chat_prefix": ["@bot"],  # 群聊时包含该前缀则会触发机器人回复
+    "group_chat_reply_prefix": "",  # 群聊时自动回复的前缀
+    "group_chat_keyword": [],  # 群聊时包含该关键词则会触发机器人回复
+    "group_at_off": False,  # 是否关闭群聊时@bot的触发
+    "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],  # 开启自动回复的群名称列表
+    "group_name_keyword_white_list": [],  # 开启自动回复的群名称关键词列表
+    "group_chat_in_one_session": ["ChatGPT测试群"],  # 支持会话上下文共享的群名称
+    "image_create_prefix": ["画", "看", "找"],  # 开启图片回复的前缀
+
+    # chatgpt会话参数
+    "expires_in_seconds": 3600,  # 无操作会话的过期时间
+    "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题，并且可以使用多种语言与人交流。",  # 人格描述
+    "conversation_max_tokens": 1000,  # 支持上下文记忆的最多字符数
+
+    # chatgpt限流配置
+    "rate_limit_chatgpt": 20,  # chatgpt的调用频率限制
+    "rate_limit_dalle": 50,  # openai dalle的调用频率限制


-    #chatgpt api参数 参考https://platform.openai.com/docs/api-reference/chat/create
+    # chatgpt api参数 参考https://platform.openai.com/docs/api-reference/chat/create
    "temperature": 0.9,
    "top_p": 1,
    "frequency_penalty": 0,
    "presence_penalty": 0,

-    #语音设置
-    "speech_recognition": False, # 是否开启语音识别
-    "group_speech_recognition": False, # 是否开启群组语音识别
-    "voice_reply_voice": False, # 是否使用语音回复语音，需要设置对应语音合成引擎的api key
-    "voice_to_text": "openai", # 语音识别引擎，支持openai和google
-    "text_to_voice": "baidu", # 语音合成引擎，支持baidu和google
+    # 语音设置
+    "speech_recognition": False,  # 是否开启语音识别
+    "group_speech_recognition": False,  # 是否开启群组语音识别
+    "voice_reply_voice": False,  # 是否使用语音回复语音，需要设置对应语音合成引擎的api key
+    "voice_to_text": "openai",  # 语音识别引擎，支持openai,google
+    "text_to_voice": "baidu",  # 语音合成引擎，支持baidu,google,pytts(offline)

    # baidu api的配置， 使用百度语音识别和语音合成时需要
-    'baidu_app_id': "",
-    'baidu_api_key': "",
-    'baidu_secret_key': "",
+    "baidu_app_id": "",
+    "baidu_api_key": "",
+    "baidu_secret_key": "",
+    # 1536普通话(支持简单的英文识别) 1737英语 1637粤语 1837四川话 1936普通话远场
+    "baidu_dev_pid": "1536",

-    #服务时间限制，目前支持itchat
-    "chat_time_module": False, # 是否开启服务时间限制
-    "chat_start_time": "00:00", # 服务开始时间
-    "chat_stop_time": "24:00", # 服务结束时间
+    # 服务时间限制，目前支持itchat
+    "chat_time_module": False,  # 是否开启服务时间限制
+    "chat_start_time": "00:00",  # 服务开始时间
+    "chat_stop_time": "24:00",  # 服务结束时间

    # itchat的配置
-    "hot_reload": False, # 是否开启热重载
+    "hot_reload": False,  # 是否开启热重载

    # wechaty的配置
-    "wechaty_puppet_service_token": "", # wechaty的token
+    "wechaty_puppet_service_token": "",  # wechaty的token

    # chatgpt指令自定义触发词
-    "clear_memory_commands": ['#清除记忆'], # 重置会话指令
+    "clear_memory_commands": ['#清除记忆'],  # 重置会话指令
+    "channel_type": "wx", # 通道类型，支持wx,wxy和terminal


 }

+
 class Config(dict):
    def __getitem__(self, key):
        if key not in available_setting:
@@ -82,15 +88,17 @@ class Config(dict):
        return super().__setitem__(key, value)

    def get(self, key, default=None):
-        try :
+        try:
            return self[key]
        except KeyError as e:
            return default
        except Exception as e:
            raise e
-    
+
+
 config = Config()

+
 def load_config():
    global config
    config_path = "./config.json"
@@ -109,7 +117,8 @@ def load_config():
    for name, value in os.environ.items():
        name = name.lower()
        if name in available_setting:
-            logger.info("[INIT] override config by environ args: {}={}".format(name, value))
+            logger.info(
+                "[INIT] override config by environ args: {}={}".format(name, value))
            try:
                config[name] = eval(value)
            except:
@@ -118,9 +127,8 @@ def load_config():
    logger.info("[INIT] load config: {}".format(config))


-
 def get_root():
-    return os.path.dirname(os.path.abspath( __file__ ))
+    return os.path.dirname(os.path.abspath(__file__))


 def read_file(path):
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,15 @@
-itchat-uos==1.5.0.dev0
-openai
-wechaty
+openai>=0.27.2
+baidu_aip>=4.16.10
+gTTS>=2.3.1
+HTMLParser>=0.0.2
+pydub>=0.25.1
+PyQRCode>=1.2.1
+pysilk>=0.0.1
+pysilk_mod>=1.6.0
+pyttsx3>=2.90
+requests>=2.28.2
+SpeechRecognition>=3.10.0
+tiktoken>=0.3.2
+webuiapi>=0.6.2
+wechaty>=0.10.7
+wechaty_puppet>=0.4.23
--- a/voice/audio_convert.py
+++ b/voice/audio_convert.py
@@ -0,0 +1,60 @@
+import wave
+import pysilk
+from pydub import AudioSegment
+
+
+def get_pcm_from_wav(wav_path):
+    """
+    从 wav 文件中读取 pcm
+
+    :param wav_path: wav 文件路径
+    :returns: pcm 数据
+    """
+    wav = wave.open(wav_path, "rb")
+    return wav.readframes(wav.getnframes())
+
+
+def mp3_to_wav(mp3_path, wav_path):
+    """
+    把mp3格式转成pcm文件
+    """
+    audio = AudioSegment.from_mp3(mp3_path)
+    audio.export(wav_path, format="wav")
+
+
+def pcm_to_silk(pcm_path, silk_path):
+    """
+    wav 文件转成 silk
+    return 声音长度，毫秒
+    """
+    audio = AudioSegment.from_wav(pcm_path)
+    wav_data = audio.raw_data
+    silk_data = pysilk.encode(
+        wav_data, data_rate=audio.frame_rate, sample_rate=audio.frame_rate)
+    with open(silk_path, "wb") as f:
+        f.write(silk_data)
+    return audio.duration_seconds * 1000
+
+
+def mp3_to_sil(mp3_path, silk_path):
+    """
+    mp3 文件转成 silk
+    return 声音长度，毫秒
+    """
+    audio = AudioSegment.from_mp3(mp3_path)
+    wav_data = audio.raw_data
+    silk_data = pysilk.encode(
+        wav_data, data_rate=audio.frame_rate, sample_rate=audio.frame_rate)
+    # Save the silk file
+    with open(silk_path, "wb") as f:
+        f.write(silk_data)
+    return audio.duration_seconds * 1000
+
+
+def sil_to_wav(silk_path, wav_path, rate: int = 24000):
+    """
+    silk 文件转 wav
+    """
+    wav_data = pysilk.decode_file(silk_path, to_wav=True, sample_rate=rate)
+    with open(wav_path, "wb") as f:
+        f.write(wav_data)
--- a/voice/baidu/baidu_voice.py
+++ b/voice/baidu/baidu_voice.py
@@ -8,19 +8,53 @@ from bridge.reply import Reply, ReplyType
 from common.log import logger
 from common.tmp_dir import TmpDir
 from voice.voice import Voice
+from voice.audio_convert import get_pcm_from_wav
 from config import conf
+"""
+    百度的语音识别API.
+    dev_pid:
+        - 1936: 普通话远场
+        - 1536：普通话(支持简单的英文识别)
+        - 1537：普通话(纯中文识别)
+        - 1737：英语
+        - 1637：粤语
+        - 1837：四川话
+    要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
+    之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
+    填入 config.json 中.
+        baidu_app_id: ''
+        baidu_api_key: ''
+        baidu_secret_key: ''
+        baidu_dev_pid: '1536'
+"""
+

 class BaiduVoice(Voice):
    APP_ID = conf().get('baidu_app_id')
    API_KEY = conf().get('baidu_api_key')
    SECRET_KEY = conf().get('baidu_secret_key')
+    DEV_ID = conf().get('baidu_dev_pid')
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
-    
+
    def __init__(self):
        pass

    def voiceToText(self, voice_file):
-        pass
+        # 识别本地文件
+        logger.debug('[Baidu] voice file name={}'.format(voice_file))
+        pcm = get_pcm_from_wav(voice_file)
+        res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.DEV_ID})
+        if res["err_no"] == 0:
+            logger.info("百度语音识别到了：{}".format(res["result"]))
+            text = "".join(res["result"])
+            reply = Reply(ReplyType.TEXT, text)
+        else:
+            logger.info("百度语音识别出错了: {}".format(res["err_msg"]))
+            if res["err_msg"] == "request pv too much":
+                logger.info("  出现这个原因很可能是你的百度语音服务调用量超出限制，或未开通付费")
+            reply = Reply(ReplyType.ERROR,
+                          "百度语音识别出错了；{0}".format(res["err_msg"]))
+        return reply

    def textToVoice(self, text):
        result = self.client.synthesis(text, 'zh', 1, {
@@ -30,7 +64,8 @@ class BaiduVoice(Voice):
            fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
            with open(fileName, 'wb') as f:
                f.write(result)
-            logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName))
+            logger.info(
+                '[Baidu] textToVoice text={} voice file name={}'.format(text, fileName))
            reply = Reply(ReplyType.VOICE, fileName)
        else:
            logger.error('[Baidu] textToVoice error={}'.format(result))
--- a/voice/google/google_voice.py
+++ b/voice/google/google_voice.py
@@ -3,12 +3,10 @@
 google voice service
 """

-import pathlib
-import subprocess
 import time
-from bridge.reply import Reply, ReplyType
 import speech_recognition
-import pyttsx3
+from gtts import gTTS
+from bridge.reply import Reply, ReplyType
 from common.log import logger
 from common.tmp_dir import TmpDir
 from voice.voice import Voice
@@ -16,22 +14,12 @@ from voice.voice import Voice

 class GoogleVoice(Voice):
    recognizer = speech_recognition.Recognizer()
-    engine = pyttsx3.init()

    def __init__(self):
-        # 语速
-        self.engine.setProperty('rate', 125)
-        # 音量
-        self.engine.setProperty('volume', 1.0)
-        # 0为男声，1为女声
-        voices = self.engine.getProperty('voices')
-        self.engine.setProperty('voice', voices[1].id)
+        pass

    def voiceToText(self, voice_file):
-        new_file = voice_file.replace('.mp3', '.wav')
-        subprocess.call('ffmpeg -i ' + voice_file +
-                        ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True)
-        with speech_recognition.AudioFile(new_file) as source:
+        with speech_recognition.AudioFile(voice_file) as source:
            audio = self.recognizer.record(source)
        try:
            text = self.recognizer.recognize_google(audio, language='zh-CN')
@@ -46,12 +34,12 @@ class GoogleVoice(Voice):
            return reply
    def textToVoice(self, text):
        try:
-            textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
-            self.engine.save_to_file(text, textFile)
-            self.engine.runAndWait()
+            mp3File = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
+            tts = gTTS(text=text, lang='zh')
+            tts.save(mp3File)            
            logger.info(
-                '[Google] textToVoice text={} voice file name={}'.format(text, textFile))
-            reply = Reply(ReplyType.VOICE, textFile)
+                '[Google] textToVoice text={} voice file name={}'.format(text, mp3File))
+            reply = Reply(ReplyType.VOICE, mp3File)
        except Exception as e:
            reply = Reply(ReplyType.ERROR, str(e))
        finally:
--- a/voice/openai/openai_voice.py
+++ b/voice/openai/openai_voice.py
@@ -28,6 +28,3 @@ class OpenaiVoice(Voice):
            reply = Reply(ReplyType.ERROR, str(e))
        finally:
            return reply
-
-    def textToVoice(self, text):
-        pass
--- a/voice/pytts/pytts_voice.py
+++ b/voice/pytts/pytts_voice.py
@@ -0,0 +1,37 @@
+
+"""
+pytts voice service (offline)
+"""
+
+import time
+import pyttsx3
+from bridge.reply import Reply, ReplyType
+from common.log import logger
+from common.tmp_dir import TmpDir
+from voice.voice import Voice
+
+
+class PyttsVoice(Voice):
+    engine = pyttsx3.init()
+
+    def __init__(self):
+        # 语速
+        self.engine.setProperty('rate', 125)
+        # 音量
+        self.engine.setProperty('volume', 1.0)
+        for voice in self.engine.getProperty('voices'):
+            if "Chinese" in voice.name:
+                self.engine.setProperty('voice', voice.id)
+
+    def textToVoice(self, text):
+        try:
+            mp3File = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
+            self.engine.save_to_file(text, mp3File)
+            self.engine.runAndWait()
+            logger.info(
+                '[Pytts] textToVoice text={} voice file name={}'.format(text, mp3File))
+            reply = Reply(ReplyType.VOICE, mp3File)
+        except Exception as e:
+            reply = Reply(ReplyType.ERROR, str(e))
+        finally:
+            return reply
--- a/voice/voice_factory.py
+++ b/voice/voice_factory.py
@@ -17,4 +17,7 @@ def create_voice(voice_type):
    elif voice_type == 'openai':
        from voice.openai.openai_voice import OpenaiVoice
        return OpenaiVoice()
+    elif voice_type == 'pytts':
+        from voice.pytts.pytts_voice import PyttsVoice
+        return PyttsVoice()
    raise RuntimeError