Merge pull request #1371 from uezhenxiang2023/Peter

add ElevenLabs TTS to voice factory
2026-02-16 08:16:06 +08:00 · 2023-08-25 16:15:18 +08:00
parent a086f1989f c55d81825a
commit 3899c0cfe3
3 changed files with 41 additions and 1 deletions
--- a/config.py
+++ b/config.py
@@ -65,7 +65,7 @@ available_setting = {
    "voice_reply_voice": False,  # 是否使用语音回复语音，需要设置对应语音合成引擎的api key
    "always_reply_voice": False,  # 是否一直使用语音回复
    "voice_to_text": "openai",  # 语音识别引擎，支持openai,baidu,google,azure
-    "text_to_voice": "baidu",  # 语音合成引擎，支持baidu,google,pytts(offline),azure
+    "text_to_voice": "baidu",  # 语音合成引擎，支持baidu,google,pytts(offline),azure,elevenlabs
    # baidu 语音api配置， 使用百度语音识别和语音合成时需要
    "baidu_app_id": "",
    "baidu_api_key": "",
@@ -75,6 +75,9 @@ available_setting = {
    # azure 语音api配置， 使用azure语音识别和语音合成时需要
    "azure_voice_api_key": "",
    "azure_voice_region": "japaneast",
+    # elevenlabs 语音api配置
+    "xi_api_key": "",    #获取ap的方法可以参考https://docs.elevenlabs.io/api-reference/quick-start/authentication
+    "xi_voice_id": "",   #ElevenLabs提供了9种英式、美式等英语发音id，分别是“Adam/Antoni/Arnold/Bella/Domi/Elli/Josh/Rachel/Sam”
    # 服务时间限制，目前支持itchat
    "chat_time_module": False,  # 是否开启服务时间限制
    "chat_start_time": "00:00",  # 服务开始时间
--- a/voice/elevent/elevent_voice.py
+++ b/voice/elevent/elevent_voice.py
@@ -0,0 +1,33 @@
+import time
+
+from elevenlabs import set_api_key,generate
+
+from bridge.reply import Reply, ReplyType
+from common.log import logger
+from common.tmp_dir import TmpDir
+from voice.voice import Voice
+from config import conf
+
+XI_API_KEY = conf().get("xi_api_key")
+set_api_key(XI_API_KEY)
+name = conf().get("xi_voice_id")
+
+class ElevenLabsVoice(Voice):
+
+    def __init__(self):
+        pass
+
+    def voiceToText(self, voice_file):
+        pass
+
+    def textToVoice(self, text):
+        audio = generate(
+            text=text,
+            voice=name,
+            model='eleven_multilingual_v1'
+        )
+        fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3"
+        with open(fileName, "wb") as f:
+            f.write(audio)
+        logger.info("[ElevenLabs] textToVoice text={} voice file name={}".format(text, fileName))
+        return Reply(ReplyType.VOICE, fileName)
--- a/voice/factory.py
+++ b/voice/factory.py
@@ -29,4 +29,8 @@ def create_voice(voice_type):
        from voice.azure.azure_voice import AzureVoice

        return AzureVoice()
+    elif voice_type == "elevenlabs":
+        from voice.elevent.elevent_voice import ElevenLabsVoice
+
+        return ElevenLabsVoice()
    raise RuntimeError