mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-01-19 01:21:01 +08:00
增加了使用阿里云进行语音识别的引擎
This commit is contained in:
@@ -95,7 +95,7 @@ available_setting = {
|
||||
"group_speech_recognition": False, # 是否开启群组语音识别
|
||||
"voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key
|
||||
"always_reply_voice": False, # 是否一直使用语音回复
|
||||
"voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,azure
|
||||
"voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,ali,azure
|
||||
"text_to_voice": "openai", # 语音合成引擎,支持openai,baidu,google,pytts(offline),ali,azure,elevenlabs,edge(online)
|
||||
"text_to_voice_model": "tts-1",
|
||||
"tts_voice_id": "alloy",
|
||||
|
||||
@@ -8,6 +8,7 @@ Description:
|
||||
|
||||
"""
|
||||
|
||||
import http.client
|
||||
import json
|
||||
import time
|
||||
import requests
|
||||
@@ -61,6 +62,69 @@ def text_to_speech_aliyun(url, text, appkey, token):
|
||||
|
||||
return output_file
|
||||
|
||||
def speech_to_text_aliyun(url, audioContent, appkey, token):
|
||||
"""
|
||||
使用阿里云的语音识别服务识别音频文件中的语音。
|
||||
|
||||
参数:
|
||||
- url (str): 阿里云语音识别服务的端点URL。
|
||||
- audioContent (byte): pcm音频数据。
|
||||
- appkey (str): 您的阿里云appkey。
|
||||
- token (str): 阿里云API的认证令牌。
|
||||
|
||||
返回值:
|
||||
- str: 成功时输出识别到的文本,否则为None。
|
||||
"""
|
||||
format = 'pcm'
|
||||
sample_rate = 16000
|
||||
enablePunctuationPrediction = True
|
||||
enableInverseTextNormalization = True
|
||||
enableVoiceDetection = False
|
||||
|
||||
# 设置RESTful请求参数
|
||||
request = url + '?appkey=' + appkey
|
||||
request = request + '&format=' + format
|
||||
request = request + '&sample_rate=' + str(sample_rate)
|
||||
|
||||
if enablePunctuationPrediction :
|
||||
request = request + '&enable_punctuation_prediction=' + 'true'
|
||||
|
||||
if enableInverseTextNormalization :
|
||||
request = request + '&enable_inverse_text_normalization=' + 'true'
|
||||
|
||||
if enableVoiceDetection :
|
||||
request = request + '&enable_voice_detection=' + 'true'
|
||||
|
||||
host = 'nls-gateway-cn-shanghai.aliyuncs.com'
|
||||
|
||||
# 设置HTTPS请求头部
|
||||
httpHeaders = {
|
||||
'X-NLS-Token': token,
|
||||
'Content-type': 'application/octet-stream',
|
||||
'Content-Length': len(audioContent)
|
||||
}
|
||||
|
||||
conn = http.client.HTTPSConnection(host)
|
||||
conn.request(method='POST', url=request, body=audioContent, headers=httpHeaders)
|
||||
|
||||
response = conn.getresponse()
|
||||
body = response.read()
|
||||
try:
|
||||
body = json.loads(body)
|
||||
status = body['status']
|
||||
if status == 20000000 :
|
||||
result = body['result']
|
||||
if result :
|
||||
logger.info(f"阿里云语音识别到了:{result}")
|
||||
conn.close()
|
||||
return result
|
||||
else :
|
||||
logger.error(f"语音识别失败,状态码: {status}")
|
||||
except ValueError:
|
||||
logger.error(f"语音识别失败,收到非JSON格式的数据: {body}")
|
||||
conn.close()
|
||||
return None
|
||||
|
||||
|
||||
class AliyunTokenGenerator:
|
||||
"""
|
||||
|
||||
@@ -15,9 +15,9 @@ import time
|
||||
|
||||
from bridge.reply import Reply, ReplyType
|
||||
from common.log import logger
|
||||
from voice.audio_convert import get_pcm_from_wav
|
||||
from voice.voice import Voice
|
||||
from voice.ali.ali_api import AliyunTokenGenerator
|
||||
from voice.ali.ali_api import text_to_speech_aliyun
|
||||
from voice.ali.ali_api import AliyunTokenGenerator, speech_to_text_aliyun, text_to_speech_aliyun
|
||||
from config import conf
|
||||
|
||||
|
||||
@@ -34,7 +34,8 @@ class AliVoice(Voice):
|
||||
self.token = None
|
||||
self.token_expire_time = 0
|
||||
# 默认复用阿里云千问的 access_key 和 access_secret
|
||||
self.api_url = config.get("api_url")
|
||||
self.api_url_voice_to_text = config.get("api_url_voice_to_text")
|
||||
self.api_url_text_to_voice = config.get("api_url_text_to_voice")
|
||||
self.app_key = config.get("app_key")
|
||||
self.access_key_id = conf().get("qwen_access_key_id") or config.get("access_key_id")
|
||||
self.access_key_secret = conf().get("qwen_access_key_secret") or config.get("access_key_secret")
|
||||
@@ -53,7 +54,7 @@ class AliVoice(Voice):
|
||||
r'äöüÄÖÜáéíóúÁÉÍÓÚàèìòùÀÈÌÒÙâêîôûÂÊÎÔÛçÇñÑ,。!?,.]', '', text)
|
||||
# 提取有效的token
|
||||
token_id = self.get_valid_token()
|
||||
fileName = text_to_speech_aliyun(self.api_url, text, self.app_key, token_id)
|
||||
fileName = text_to_speech_aliyun(self.api_url_text_to_voice, text, self.app_key, token_id)
|
||||
if fileName:
|
||||
logger.info("[Ali] textToVoice text={} voice file name={}".format(text, fileName))
|
||||
reply = Reply(ReplyType.VOICE, fileName)
|
||||
@@ -61,6 +62,25 @@ class AliVoice(Voice):
|
||||
reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
|
||||
return reply
|
||||
|
||||
def voiceToText(self, voice_file):
|
||||
"""
|
||||
将语音文件转换为文本。
|
||||
|
||||
:param voice_file: 要转换的语音文件。
|
||||
:return: 返回一个Reply对象,其中包含转换得到的文本或错误信息。
|
||||
"""
|
||||
# 提取有效的token
|
||||
token_id = self.get_valid_token()
|
||||
logger.debug("[Ali] voice file name={}".format(voice_file))
|
||||
pcm = get_pcm_from_wav(voice_file)
|
||||
text = speech_to_text_aliyun(self.api_url_voice_to_text, pcm, self.app_key, token_id)
|
||||
if text:
|
||||
logger.info("[Ali] VoicetoText = {}".format(text))
|
||||
reply = Reply(ReplyType.TEXT, text)
|
||||
else:
|
||||
reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败")
|
||||
return reply
|
||||
|
||||
def get_valid_token(self):
|
||||
"""
|
||||
获取有效的阿里云token。
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
{
|
||||
"api_url": "https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/tts",
|
||||
"api_url_text_to_voice": "https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/tts",
|
||||
"api_url_voice_to_text": "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/asr",
|
||||
"app_key": "",
|
||||
"access_key_id": "",
|
||||
"access_key_secret": ""
|
||||
|
||||
Reference in New Issue
Block a user