From 814ce7a43b949fa3849319d83a986fa86a8b6cad Mon Sep 17 00:00:00 2001
From: thzjy <thzjy@163.com>
Date: Sun, 18 May 2025 17:32:17 +0800
Subject: [PATCH 1/2] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E7=99=BE=E5=BA=A6?=
 =?UTF-8?q?=E8=AF=AD=E9=9F=B3=E5=90=88=E6=88=90=E9=95=BF=E6=96=87=E5=A4=84?=
 =?UTF-8?q?=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 voice/baidu/baidu_voice.py | 182 ++++++++++++++++++++++++++-----------
 1 file changed, 130 insertions(+), 52 deletions(-)

diff --git a/voice/baidu/baidu_voice.py b/voice/baidu/baidu_voice.py
index fbf53ce..4fa6a56 100644
--- a/voice/baidu/baidu_voice.py
+++ b/voice/baidu/baidu_voice.py
@@ -1,9 +1,11 @@
 """
-baidu voice service
+baidu voice service with thread-safe token caching
 """
 import json
 import os
 import time
+import threading
+import requests
 
 from aip import AipSpeech
 
@@ -14,28 +16,13 @@ from config import conf
 from voice.audio_convert import get_pcm_from_wav
 from voice.voice import Voice
 
-"""
-    百度的语音识别API.
-    dev_pid:
-        - 1936: 普通话远场
-        - 1536：普通话(支持简单的英文识别)
-        - 1537：普通话(纯中文识别)
-        - 1737：英语
-        - 1637：粤语
-        - 1837：四川话
-    要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
-    之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
-    然后在 config.json 中填入这两个值, 以及 app_id, dev_pid
-    """
-
-
 class BaiduVoice(Voice):
     def __init__(self):
         try:
+            # 读取本地 TTS 参数配置
             curdir = os.path.dirname(__file__)
             config_path = os.path.join(curdir, "config.json")
-            bconf = None
-            if not os.path.exists(config_path):  # 如果没有配置文件，创建本地配置文件
+            if not os.path.exists(config_path):
                 bconf = {"lang": "zh", "ctp": 1, "spd": 5, "pit": 5, "vol": 5, "per": 0}
                 with open(config_path, "w") as fw:
                     json.dump(bconf, fw, indent=4)
@@ -47,48 +34,139 @@ class BaiduVoice(Voice):
             self.api_key = str(conf().get("baidu_api_key"))
             self.secret_key = str(conf().get("baidu_secret_key"))
             self.dev_id = conf().get("baidu_dev_pid")
-            self.lang = bconf["lang"]
-            self.ctp = bconf["ctp"]
-            self.spd = bconf["spd"]
-            self.pit = bconf["pit"]
-            self.vol = bconf["vol"]
-            self.per = bconf["per"]
 
+            self.lang = bconf["lang"]
+            self.ctp  = bconf["ctp"]
+            self.spd  = bconf["spd"]
+            self.pit  = bconf["pit"]
+            self.vol  = bconf["vol"]
+            self.per  = bconf["per"]
+
+            # 百度 SDK 客户端（短文本合成 & 语音识别）
             self.client = AipSpeech(self.app_id, self.api_key, self.secret_key)
+
+            # access_token 缓存与锁
+            self._access_token    = None
+            self._token_expire_ts = 0
+            self._token_lock      = threading.Lock()
         except Exception as e:
-            logger.warn("BaiduVoice init failed: %s, ignore " % e)
+            logger.warn("BaiduVoice init failed: %s, ignore" % e)
+
+    def _get_access_token(self):
+        # 多线程安全获取 token
+        with self._token_lock:
+            now = time.time()
+            if self._access_token and now < self._token_expire_ts:
+                return self._access_token
+            url = "https://aip.baidubce.com/oauth/2.0/token"
+            params = {
+                "grant_type":    "client_credentials",
+                "client_id":     self.api_key,
+                "client_secret": self.secret_key,
+            }
+            resp = requests.post(url, params=params).json()
+            token = resp.get("access_token")
+            expires_in = resp.get("expires_in", 2592000)
+            if token:
+                self._access_token    = token
+                self._token_expire_ts = now + expires_in - 60  # 提前 1 分钟过期
+                return token
+            else:
+                logger.error("BaiduVoice _get_access_token failed: %s", resp)
+                return None
 
     def voiceToText(self, voice_file):
-        # 识别本地文件
-        logger.debug("[Baidu] voice file name={}".format(voice_file))
+        logger.debug("[Baidu] recognize voice file=%s", voice_file)
         pcm = get_pcm_from_wav(voice_file)
         res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id})
-        if res["err_no"] == 0:
-            logger.info("百度语音识别到了：{}".format(res["result"]))
+        if res.get("err_no") == 0:
             text = "".join(res["result"])
-            reply = Reply(ReplyType.TEXT, text)
+            logger.info("[Baidu] ASR result: %s", text)
+            return Reply(ReplyType.TEXT, text)
         else:
-            logger.info("百度语音识别出错了: {}".format(res["err_msg"]))
-            if res["err_msg"] == "request pv too much":
-                logger.info("  出现这个原因很可能是你的百度语音服务调用量超出限制，或未开通付费")
-            reply = Reply(ReplyType.ERROR, "百度语音识别出错了；{0}".format(res["err_msg"]))
-        return reply
+            err = res.get("err_msg", "")
+            logger.error("[Baidu] ASR error: %s", err)
+            return Reply(ReplyType.ERROR, f"语音识别失败：{err}")
+
+    def _long_text_synthesis(self, text):
+        token = self._get_access_token()
+        if not token:
+            return Reply(ReplyType.ERROR, "获取百度 access_token 失败")
+
+        # 创建合成任务
+        create_url = f"https://aip.baidubce.com/rpc/2.0/tts/v1/create?access_token={token}"
+        payload = {
+            "text":            text,
+            "format":          "mp3-16k",
+            "voice":           0,
+            "lang":            self.lang,
+            "speed":           self.spd,
+            "pitch":           self.pit,
+            "volume":          self.vol,
+            "enable_subtitle": 0,
+        }
+        headers = {"Content-Type": "application/json"}
+        create_resp = requests.post(create_url, headers=headers, json=payload).json()
+        task_id = create_resp.get("task_id")
+        if not task_id:
+            logger.error("[Baidu] 长文本合成创建任务失败: %s", create_resp)
+            return Reply(ReplyType.ERROR, "长文本合成任务提交失败")
+        logger.info("[Baidu] 长文本合成任务已提交 task_id=%s", task_id)
+
+        # 轮询查询任务状态
+        query_url = f"https://aip.baidubce.com/rpc/2.0/tts/v1/query?access_token={token}"
+        for _ in range(30):
+            time.sleep(1.5)
+            resp = requests.post(query_url, headers=headers, json={"task_ids":[task_id]})
+            result = resp.json()
+            infos = result.get("tasks_info") or result.get("tasks") or []
+            if not infos:
+                continue
+            info = infos[0]
+            status = info.get("task_status")
+            if status == "Success":
+                task_res = info.get("task_result", {})
+                audio_url = task_res.get("audio_address") or task_res.get("speech_url")
+                break
+            elif status == "Running":
+                continue
+            else:
+                logger.error("[Baidu] 长文本合成失败: %s", info)
+                return Reply(ReplyType.ERROR, "长文本合成执行失败")
+        else:
+            return Reply(ReplyType.ERROR, "长文本合成超时，请稍后重试")
+
+        # 下载并保存音频
+        audio_data = requests.get(audio_url).content
+        fn = TmpDir().path() + f"reply-long-{int(time.time())}-{hash(text)&0x7FFFFFFF}.mp3"
+        with open(fn, "wb") as f:
+            f.write(audio_data)
+        logger.info("[Baidu] 长文本合成 success: %s", fn)
+        return Reply(ReplyType.VOICE, fn)
 
     def textToVoice(self, text):
-        result = self.client.synthesis(
-            text,
-            self.lang,
-            self.ctp,
-            {"spd": self.spd, "pit": self.pit, "vol": self.vol, "per": self.per},
-        )
-        if not isinstance(result, dict):
-            # Avoid the same filename under multithreading
-            fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3"
-            with open(fileName, "wb") as f:
-                f.write(result)
-            logger.info("[Baidu] textToVoice text={} voice file name={}".format(text, fileName))
-            reply = Reply(ReplyType.VOICE, fileName)
-        else:
-            logger.error("[Baidu] textToVoice error={}".format(result))
-            reply = Reply(ReplyType.ERROR, "抱歉，语音合成失败")
-        return reply
+        try:
+            # GBK 编码字节长度
+            gbk_len = len(text.encode("gbk", errors="ignore"))
+            if gbk_len <= 120:
+                # 短文本走 SDK 合成
+                result = self.client.synthesis(
+                    text, self.lang, self.ctp,
+                    {"spd":self.spd, "pit":self.pit, "vol":self.vol, "per":self.per}
+                )
+                if not isinstance(result, dict):
+                    fn = TmpDir().path() + f"reply-{int(time.time())}-{hash(text)&0x7FFFFFFF}.mp3"
+                    with open(fn, "wb") as f:
+                        f.write(result)
+                    logger.info("[Baidu] 短文本合成 success: %s", fn)
+                    return Reply(ReplyType.VOICE, fn)
+                else:
+                    logger.error("[Baidu] 短文本合成 error: %s", result)
+                    return Reply(ReplyType.ERROR, "短文本语音合成失败")
+            else:
+                # 长文本
+                return self._long_text_synthesis(text)
+        except Exception as e:
+            logger.error("BaiduVoice textToVoice exception: %s", e)
+            return Reply(ReplyType.ERROR, f"合成异常：{e}")
+

From e0dd21406dfe4d27a2d8bf3137e7d74a711fb664 Mon Sep 17 00:00:00 2001
From: vision <vision_wangpc@sina.com>
Date: Fri, 23 May 2025 15:13:28 +0800
Subject: [PATCH 2/2] Update baidu_voice.py

---
 voice/baidu/baidu_voice.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/voice/baidu/baidu_voice.py b/voice/baidu/baidu_voice.py
index 4fa6a56..2c4bc4b 100644
--- a/voice/baidu/baidu_voice.py
+++ b/voice/baidu/baidu_voice.py
@@ -115,8 +115,8 @@ class BaiduVoice(Voice):
 
         # 轮询查询任务状态
         query_url = f"https://aip.baidubce.com/rpc/2.0/tts/v1/query?access_token={token}"
-        for _ in range(30):
-            time.sleep(1.5)
+        for _ in range(100):
+            time.sleep(3)
             resp = requests.post(query_url, headers=headers, json={"task_ids":[task_id]})
             result = resp.json()
             infos = result.get("tasks_info") or result.get("tasks") or []
@@ -148,7 +148,7 @@ class BaiduVoice(Voice):
         try:
             # GBK 编码字节长度
             gbk_len = len(text.encode("gbk", errors="ignore"))
-            if gbk_len <= 120:
+            if gbk_len <= 1024:
                 # 短文本走 SDK 合成
                 result = self.client.synthesis(
                     text, self.lang, self.ctp,