mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-01-19 01:21:01 +08:00
89 lines
3.3 KiB
Python
89 lines
3.3 KiB
Python
"""
|
||
azure voice service
|
||
"""
|
||
import json
|
||
import os
|
||
import time
|
||
|
||
import azure.cognitiveservices.speech as speechsdk
|
||
|
||
from bridge.reply import Reply, ReplyType
|
||
from common.log import logger
|
||
from common.tmp_dir import TmpDir
|
||
from config import conf
|
||
from voice.voice import Voice
|
||
|
||
"""
|
||
Azure voice
|
||
主目录设置文件中需填写azure_voice_api_key和azure_voice_region
|
||
|
||
查看可用的 voice: https://speech.microsoft.com/portal/voicegallery
|
||
|
||
"""
|
||
|
||
|
||
class AzureVoice(Voice):
|
||
def __init__(self):
|
||
try:
|
||
curdir = os.path.dirname(__file__)
|
||
config_path = os.path.join(curdir, "config.json")
|
||
config = None
|
||
if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件
|
||
config = {
|
||
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
|
||
"speech_recognition_language": "zh-CN",
|
||
}
|
||
with open(config_path, "w") as fw:
|
||
json.dump(config, fw, indent=4)
|
||
else:
|
||
with open(config_path, "r") as fr:
|
||
config = json.load(fr)
|
||
self.api_key = conf().get("azure_voice_api_key")
|
||
self.api_region = conf().get("azure_voice_region")
|
||
self.speech_config = speechsdk.SpeechConfig(
|
||
subscription=self.api_key, region=self.api_region
|
||
)
|
||
self.speech_config.speech_synthesis_voice_name = config[
|
||
"speech_synthesis_voice_name"
|
||
]
|
||
self.speech_config.speech_recognition_language = config[
|
||
"speech_recognition_language"
|
||
]
|
||
except Exception as e:
|
||
logger.warn("AzureVoice init failed: %s, ignore " % e)
|
||
|
||
def voiceToText(self, voice_file):
|
||
audio_config = speechsdk.AudioConfig(filename=voice_file)
|
||
speech_recognizer = speechsdk.SpeechRecognizer(
|
||
speech_config=self.speech_config, audio_config=audio_config
|
||
)
|
||
result = speech_recognizer.recognize_once()
|
||
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
|
||
logger.info(
|
||
"[Azure] voiceToText voice file name={} text={}".format(
|
||
voice_file, result.text
|
||
)
|
||
)
|
||
reply = Reply(ReplyType.TEXT, result.text)
|
||
else:
|
||
logger.error("[Azure] voiceToText error, result={}".format(result))
|
||
reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败")
|
||
return reply
|
||
|
||
def textToVoice(self, text):
|
||
fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav"
|
||
audio_config = speechsdk.AudioConfig(filename=fileName)
|
||
speech_synthesizer = speechsdk.SpeechSynthesizer(
|
||
speech_config=self.speech_config, audio_config=audio_config
|
||
)
|
||
result = speech_synthesizer.speak_text(text)
|
||
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
|
||
logger.info(
|
||
"[Azure] textToVoice text={} voice file name={}".format(text, fileName)
|
||
)
|
||
reply = Reply(ReplyType.VOICE, fileName)
|
||
else:
|
||
logger.error("[Azure] textToVoice error, result={}".format(result))
|
||
reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
|
||
return reply
|