chatgpt-on-wechat/voice/tencent/tencent_voice.py

import json
import base64
import os
import time
from voice.voice import Voice
from common.log import logger
from tencentcloud.common import credential
from tencentcloud.asr.v20190614 import asr_client, models as asr_models
from tencentcloud.tts.v20190823 import tts_client, models as tts_models
from bridge.reply import Reply, ReplyType
from common.tmp_dir import TmpDir

class TencentVoice(Voice):
    def __init__(self):
        super().__init__()
        self.secret_id = None
        self.secret_key = None
        self.voice_type = 1003
        self._load_config()

    def _load_config(self):
        """
        从本地配置文件加载配置
        """
        try:
            config_path = os.path.join(os.path.dirname(__file__), 'config.json')
            with open(config_path, 'r') as f:
                config = json.load(f)
            self.secret_id = config.get('secret_id')
            self.secret_key = config.get('secret_key')
            self.voice_type = config.get('voice_type', self.voice_type)
            if not self.secret_id or not self.secret_key:
                logger.error("[Tencent] Missing credentials in config.json")
        except Exception as e:
            logger.error(f"[Tencent] Failed to load config: {e}")

    def setup(self, config):
        """
        设置配置信息（保留此方法用于向后兼容）
        """
        pass

    def voiceToText(self, voice_file):
        """
        将语音文件转换为文本
        """
        try:
            # 实例化认证对象
            cred = credential.Credential(self.secret_id, self.secret_key)

            # 实例化客户端
            client = asr_client.AsrClient(cred, "ap-guangzhou")

            # 读取音频文件
            with open(voice_file, 'rb') as f:
                audio_data = f.read()

            # 进行base64编码
            base64_audio = base64.b64encode(audio_data).decode('utf-8')

            # 构造请求对象
            req = asr_models.SentenceRecognitionRequest()
            req.ProjectId = 0
            req.SubServiceType = 2
            req.EngSerViceType = "16k_zh"
            req.SourceType = 1
            req.VoiceFormat = "wav"
            req.UsrAudioKey = "voice_recognition"
            req.Data = base64_audio

            # 发起请求
            resp = client.SentenceRecognition(req)

            # 解析结果
            if resp.Result:
                logger.info("[Tencent] Voice to text success: {}".format(resp.Result))
                return Reply(ReplyType.TEXT, resp.Result)
            else:
                logger.warning("[Tencent] Voice to text failed")
                return Reply(ReplyType.ERROR, "腾讯语音识别失败")

        except Exception as e:
            logger.error("[Tencent] Voice to text error: {}".format(e))
            return Reply(ReplyType.ERROR, "腾讯语音识别出错：{}".format(str(e)))

    def textToVoice(self, text):
        """
        将文本转换为语音
        """
        try:
            cred = credential.Credential(self.secret_id, self.secret_key)
            client = tts_client.TtsClient(cred, "ap-guangzhou")

            req = tts_models.TextToVoiceRequest()
            req.Text = text
            req.SessionId = str(int(time.time()))
            req.Volume = 5
            req.Speed = 0
            req.ProjectId = 0
            req.ModelType = 1
            req.PrimaryLanguage = 1
            req.SampleRate = 16000
            req.VoiceType = self.voice_type  # 客服女声

            response = client.TextToVoice(req)

            if response.Audio:
                fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3"
                with open(fileName, "wb") as f:
                    f.write(base64.b64decode(response.Audio))
                logger.info("[Tencent] textToVoice text={} voice file name={}".format(text, fileName))
                return Reply(ReplyType.VOICE, fileName)
            else:
                logger.error("[Tencent] textToVoice failed")
                return Reply(ReplyType.ERROR, "腾讯语音合成失败")

        except Exception as e:
            logger.error("[Tencent] Text to voice error: {}".format(e))
            return Reply(ReplyType.ERROR, "腾讯语音合成出错：{}".format(str(e)))