mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-03-03 17:05:04 +08:00
解决语音的识别和转换兼容性
This commit is contained in:
@@ -5,6 +5,9 @@ wechat channel
|
||||
"""
|
||||
|
||||
import os
|
||||
import requests
|
||||
import io
|
||||
import time
|
||||
from lib import itchat
|
||||
import json
|
||||
from lib.itchat.content import *
|
||||
@@ -17,9 +20,7 @@ from common.tmp_dir import TmpDir
|
||||
from config import conf
|
||||
from common.time_check import time_checker
|
||||
from plugins import *
|
||||
import requests
|
||||
import io
|
||||
import time
|
||||
from voice.audio_convert import mp3_to_wav
|
||||
|
||||
|
||||
thread_pool = ThreadPoolExecutor(max_workers=8)
|
||||
@@ -28,8 +29,7 @@ thread_pool = ThreadPoolExecutor(max_workers=8)
|
||||
def thread_pool_callback(worker):
|
||||
worker_exception = worker.exception()
|
||||
if worker_exception:
|
||||
logger.exception(
|
||||
"Worker return exception: {}".format(worker_exception))
|
||||
logger.exception("Worker return exception: {}".format(worker_exception))
|
||||
|
||||
|
||||
@itchat.msg_register(TEXT)
|
||||
@@ -247,9 +247,16 @@ class WechatChannel(Channel):
|
||||
reply = super().build_reply_content(context.content, context)
|
||||
elif context.type == ContextType.VOICE:
|
||||
msg = context['msg']
|
||||
file_name = TmpDir().path() + context.content
|
||||
msg.download(file_name)
|
||||
reply = super().build_voice_to_text(file_name)
|
||||
mp3_path = TmpDir().path() + context.content
|
||||
msg.download(mp3_path)
|
||||
# mp3转wav
|
||||
wav_path = os.path.splitext(mp3_path)[0] + '.wav'
|
||||
mp3_to_wav(mp3_path=mp3_path, wav_path=wav_path)
|
||||
# 语音识别
|
||||
reply = super().build_voice_to_text(wav_path)
|
||||
# 删除临时文件
|
||||
os.remove(wav_path)
|
||||
os.remove(mp3_path)
|
||||
if reply.type != ReplyType.ERROR and reply.type != ReplyType.INFO:
|
||||
context.content = reply.content # 语音转文字后,将文字内容作为新的context
|
||||
context.type = ContextType.TEXT
|
||||
@@ -263,12 +270,13 @@ class WechatChannel(Channel):
|
||||
prefixes = conf().get('group_chat_prefix')
|
||||
for prefix in prefixes:
|
||||
if context.content.startswith(prefix):
|
||||
context.content = context.content.replace(prefix, '', 1).strip()
|
||||
context.content = context.content.replace(
|
||||
prefix, '', 1).strip()
|
||||
break
|
||||
else:
|
||||
logger.info("[WX]receive voice check prefix: " + 'False')
|
||||
return
|
||||
|
||||
|
||||
reply = super().build_reply_content(context.content, context)
|
||||
if reply.type == ReplyType.TEXT:
|
||||
if conf().get('voice_reply_voice'):
|
||||
|
||||
@@ -4,25 +4,19 @@
|
||||
wechaty channel
|
||||
Python Wechaty - https://github.com/wechaty/python-wechaty
|
||||
"""
|
||||
import io
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import asyncio
|
||||
import requests
|
||||
import pysilk
|
||||
import wave
|
||||
from pydub import AudioSegment
|
||||
from typing import Optional, Union
|
||||
from bridge.context import Context, ContextType
|
||||
from wechaty_puppet import MessageType, FileBox, ScanStatus # type: ignore
|
||||
from wechaty import Wechaty, Contact
|
||||
from wechaty.user import Message, Room, MiniProgram, UrlLink
|
||||
from wechaty.user import Message, MiniProgram, UrlLink
|
||||
from channel.channel import Channel
|
||||
from common.log import logger
|
||||
from common.tmp_dir import TmpDir
|
||||
from config import conf
|
||||
|
||||
from voice.audio_convert import sil_to_wav, mp3_to_sil
|
||||
|
||||
class WechatyChannel(Channel):
|
||||
|
||||
@@ -50,8 +44,8 @@ class WechatyChannel(Channel):
|
||||
|
||||
async def on_scan(self, status: ScanStatus, qr_code: Optional[str] = None,
|
||||
data: Optional[str] = None):
|
||||
contact = self.Contact.load(self.contact_id)
|
||||
logger.info('[WX] scan user={}, scan status={}, scan qr_code={}'.format(contact, status.name, qr_code))
|
||||
# contact = self.Contact.load(self.contact_id)
|
||||
# logger.info('[WX] scan user={}, scan status={}, scan qr_code={}'.format(contact, status.name, qr_code))
|
||||
# print(f'user <{contact}> scan status: {status.name} , 'f'qr_code: {qr_code}')
|
||||
|
||||
async def on_message(self, msg: Message):
|
||||
@@ -67,7 +61,7 @@ class WechatyChannel(Channel):
|
||||
content = msg.text()
|
||||
mention_content = await msg.mention_text() # 返回过滤掉@name后的消息
|
||||
match_prefix = self.check_prefix(content, conf().get('single_chat_prefix'))
|
||||
conversation: Union[Room, Contact] = from_contact if room is None else room
|
||||
# conversation: Union[Room, Contact] = from_contact if room is None else room
|
||||
|
||||
if room is None and msg.type() == MessageType.MESSAGE_TYPE_TEXT:
|
||||
if not msg.is_self() and match_prefix is not None:
|
||||
@@ -102,21 +96,8 @@ class WechatyChannel(Channel):
|
||||
await voice_file.to_file(silk_file)
|
||||
logger.info("[WX]receive voice file: " + silk_file)
|
||||
# 将文件转成wav格式音频
|
||||
wav_file = silk_file.replace(".slk", ".wav")
|
||||
with open(silk_file, 'rb') as f:
|
||||
silk_data = f.read()
|
||||
pcm_data = pysilk.decode(silk_data)
|
||||
|
||||
with wave.open(wav_file, 'wb') as wav_data:
|
||||
wav_data.setnchannels(1)
|
||||
wav_data.setsampwidth(2)
|
||||
wav_data.setframerate(24000)
|
||||
wav_data.writeframes(pcm_data)
|
||||
if os.path.exists(wav_file):
|
||||
converter_state = "true" # 转换wav成功
|
||||
else:
|
||||
converter_state = "false" # 转换wav失败
|
||||
logger.info("[WX]receive voice converter: " + converter_state)
|
||||
wav_file = os.path.splitext(silk_file)[0] + '.wav'
|
||||
sil_to_wav(silk_file, wav_file)
|
||||
# 语音识别为文本
|
||||
query = super().build_voice_to_text(wav_file).content
|
||||
# 交验关键字
|
||||
@@ -183,21 +164,8 @@ class WechatyChannel(Channel):
|
||||
await voice_file.to_file(silk_file)
|
||||
logger.info("[WX]receive voice file: " + silk_file)
|
||||
# 将文件转成wav格式音频
|
||||
wav_file = silk_file.replace(".slk", ".wav")
|
||||
with open(silk_file, 'rb') as f:
|
||||
silk_data = f.read()
|
||||
pcm_data = pysilk.decode(silk_data)
|
||||
|
||||
with wave.open(wav_file, 'wb') as wav_data:
|
||||
wav_data.setnchannels(1)
|
||||
wav_data.setsampwidth(2)
|
||||
wav_data.setframerate(24000)
|
||||
wav_data.writeframes(pcm_data)
|
||||
if os.path.exists(wav_file):
|
||||
converter_state = "true" # 转换wav成功
|
||||
else:
|
||||
converter_state = "false" # 转换wav失败
|
||||
logger.info("[WX]receive voice converter: " + converter_state)
|
||||
wav_file = os.path.splitext(silk_file)[0] + '.wav'
|
||||
sil_to_wav(silk_file, wav_file)
|
||||
# 语音识别为文本
|
||||
query = super().build_voice_to_text(wav_file).content
|
||||
# 校验关键字
|
||||
@@ -260,21 +228,12 @@ class WechatyChannel(Channel):
|
||||
if reply_text:
|
||||
# 转换 mp3 文件为 silk 格式
|
||||
mp3_file = super().build_text_to_voice(reply_text).content
|
||||
silk_file = mp3_file.replace(".mp3", ".silk")
|
||||
# Load the MP3 file
|
||||
audio = AudioSegment.from_file(mp3_file, format="mp3")
|
||||
# Convert to WAV format
|
||||
audio = audio.set_frame_rate(24000).set_channels(1)
|
||||
wav_data = audio.raw_data
|
||||
sample_width = audio.sample_width
|
||||
# Encode to SILK format
|
||||
silk_data = pysilk.encode(wav_data, 24000)
|
||||
# Save the silk file
|
||||
with open(silk_file, "wb") as f:
|
||||
f.write(silk_data)
|
||||
silk_file = os.path.splitext(mp3_file)[0] + '.sil'
|
||||
voiceLength = mp3_to_sil(mp3_file, silk_file)
|
||||
# 发送语音
|
||||
t = int(time.time())
|
||||
file_box = FileBox.from_file(silk_file, name=str(t) + '.silk')
|
||||
file_box = FileBox.from_file(silk_file, name=str(t) + '.sil')
|
||||
file_box.metadata = {'voiceLength': voiceLength}
|
||||
await self.send(file_box, reply_user_id)
|
||||
# 清除缓存文件
|
||||
os.remove(mp3_file)
|
||||
@@ -337,21 +296,12 @@ class WechatyChannel(Channel):
|
||||
reply_text = '@' + group_user_name + ' ' + reply_text.strip()
|
||||
# 转换 mp3 文件为 silk 格式
|
||||
mp3_file = super().build_text_to_voice(reply_text).content
|
||||
silk_file = mp3_file.replace(".mp3", ".silk")
|
||||
# Load the MP3 file
|
||||
audio = AudioSegment.from_file(mp3_file, format="mp3")
|
||||
# Convert to WAV format
|
||||
audio = audio.set_frame_rate(24000).set_channels(1)
|
||||
wav_data = audio.raw_data
|
||||
sample_width = audio.sample_width
|
||||
# Encode to SILK format
|
||||
silk_data = pysilk.encode(wav_data, 24000)
|
||||
# Save the silk file
|
||||
with open(silk_file, "wb") as f:
|
||||
f.write(silk_data)
|
||||
silk_file = os.path.splitext(mp3_file)[0] + '.sil'
|
||||
voiceLength = mp3_to_sil(mp3_file, silk_file)
|
||||
# 发送语音
|
||||
t = int(time.time())
|
||||
file_box = FileBox.from_file(silk_file, name=str(t) + '.silk')
|
||||
file_box.metadata = {'voiceLength': voiceLength}
|
||||
await self.send_group(file_box, group_id)
|
||||
# 清除缓存文件
|
||||
os.remove(mp3_file)
|
||||
|
||||
@@ -3,17 +3,14 @@
|
||||
google voice service
|
||||
"""
|
||||
|
||||
import pathlib
|
||||
import subprocess
|
||||
import time
|
||||
from bridge.reply import Reply, ReplyType
|
||||
import speech_recognition
|
||||
import pyttsx3
|
||||
from gtts import gTTS
|
||||
from bridge.reply import Reply, ReplyType
|
||||
from common.log import logger
|
||||
from common.tmp_dir import TmpDir
|
||||
from voice.voice import Voice
|
||||
from voice.audio_convert import mp3_to_wav
|
||||
|
||||
|
||||
class GoogleVoice(Voice):
|
||||
@@ -30,11 +27,10 @@ class GoogleVoice(Voice):
|
||||
self.engine.setProperty('voice', voices[1].id)
|
||||
|
||||
def voiceToText(self, voice_file):
|
||||
new_file = voice_file.replace('.mp3', '.wav')
|
||||
# new_file = voice_file.replace('.mp3', '.wav')
|
||||
# subprocess.call('ffmpeg -i ' + voice_file +
|
||||
# ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True)
|
||||
mp3_to_wav(voice_file, new_file)
|
||||
with speech_recognition.AudioFile(new_file) as source:
|
||||
with speech_recognition.AudioFile(voice_file) as source:
|
||||
audio = self.recognizer.record(source)
|
||||
try:
|
||||
text = self.recognizer.recognize_google(audio, language='zh-CN')
|
||||
|
||||
Reference in New Issue
Block a user