Files
chatgpt-on-wechat/voice/baidu/baidu_voice.py
2023-04-17 01:01:02 +08:00

96 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
baidu voice service
"""
import json
import os
import time
from aip import AipSpeech
from bridge.reply import Reply, ReplyType
from common.log import logger
from common.tmp_dir import TmpDir
from config import conf
from voice.audio_convert import get_pcm_from_wav
from voice.voice import Voice
"""
百度的语音识别API.
dev_pid:
- 1936: 普通话远场
- 1536普通话(支持简单的英文识别)
- 1537普通话(纯中文识别)
- 1737英语
- 1637粤语
- 1837四川话
要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
然后在 config.json 中填入这两个值, 以及 app_id, dev_pid
"""
class BaiduVoice(Voice):
def __init__(self):
try:
curdir = os.path.dirname(__file__)
config_path = os.path.join(curdir, "config.json")
bconf = None
if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件
bconf = {"lang": "zh", "ctp": 1, "spd": 5, "pit": 5, "vol": 5, "per": 0}
with open(config_path, "w") as fw:
json.dump(bconf, fw, indent=4)
else:
with open(config_path, "r") as fr:
bconf = json.load(fr)
self.app_id = conf().get("baidu_app_id")
self.api_key = conf().get("baidu_api_key")
self.secret_key = conf().get("baidu_secret_key")
self.dev_id = conf().get("baidu_dev_pid")
self.lang = bconf["lang"]
self.ctp = bconf["ctp"]
self.spd = bconf["spd"]
self.pit = bconf["pit"]
self.vol = bconf["vol"]
self.per = bconf["per"]
self.client = AipSpeech(self.app_id, self.api_key, self.secret_key)
except Exception as e:
logger.warn("BaiduVoice init failed: %s, ignore " % e)
def voiceToText(self, voice_file):
# 识别本地文件
logger.debug("[Baidu] voice file name={}".format(voice_file))
pcm = get_pcm_from_wav(voice_file)
res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id})
if res["err_no"] == 0:
logger.info("百度语音识别到了:{}".format(res["result"]))
text = "".join(res["result"])
reply = Reply(ReplyType.TEXT, text)
else:
logger.info("百度语音识别出错了: {}".format(res["err_msg"]))
if res["err_msg"] == "request pv too much":
logger.info(" 出现这个原因很可能是你的百度语音服务调用量超出限制,或未开通付费")
reply = Reply(ReplyType.ERROR, "百度语音识别出错了;{0}".format(res["err_msg"]))
return reply
def textToVoice(self, text):
result = self.client.synthesis(
text,
self.lang,
self.ctp,
{"spd": self.spd, "pit": self.pit, "vol": self.vol, "per": self.per},
)
if not isinstance(result, dict):
fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".mp3"
with open(fileName, "wb") as f:
f.write(result)
logger.info(
"[Baidu] textToVoice text={} voice file name={}".format(text, fileName)
)
reply = Reply(ReplyType.VOICE, fileName)
else:
logger.error("[Baidu] textToVoice error={}".format(result))
reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
return reply