Compare commits
3 commits
32a978114e
...
efea9771c8
Author | SHA1 | Date | |
---|---|---|---|
efea9771c8 | |||
3a7bc4cf93 | |||
e33c3e41a0 |
1 changed files with 71 additions and 49 deletions
120
bot.py
120
bot.py
|
@ -1,46 +1,67 @@
|
|||
## Main bot
|
||||
# A lot of the handler code for speechsdk is copy/pasted from the
|
||||
# Azure documentation example for continuous recognition for compressed
|
||||
# audio, but reworked to use pydub instead.
|
||||
|
||||
from telegram import Update
|
||||
from telegram.constants import ChatAction
|
||||
from telegram.ext import ApplicationBuilder, ContextTypes, MessageHandler, CommandHandler, PicklePersistence
|
||||
from telegram.ext.filters import VOICE
|
||||
from telegram.ext.filters import VOICE, Chat
|
||||
import logging
|
||||
from os import getenv
|
||||
from os import getenv, unlink
|
||||
from dotenv import load_dotenv
|
||||
import azure.cognitiveservices.speech as speechsdk
|
||||
from asyncio import sleep, get_event_loop
|
||||
from asyncio import sleep
|
||||
from pydub import AudioSegment
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
level=logging.INFO
|
||||
)
|
||||
async def clean_file_async(context: ContextTypes.DEFAULT_TYPE):
|
||||
job = context.job
|
||||
job.data['file'].close()
|
||||
unlink(job.data['file'].name)
|
||||
async def handle_voice(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
logger = logging.getLogger('Recognizer')
|
||||
logger.info("Received voice message!")
|
||||
chat_id = update.effective_chat.id
|
||||
voice_id = update.message.id
|
||||
msg = await context.bot.send_message(chat_id=chat_id, text="Working on it!",
|
||||
status_msg = await context.bot.send_message(chat_id=chat_id, text="Working on it!",
|
||||
reply_to_message_id=voice_id)
|
||||
|
||||
def clean_file(file):
|
||||
context.job_queue.run_once(clean_file_async, 60.0, {'file':file})
|
||||
new_file = await context.bot.get_file(update.message.voice.file_id)
|
||||
path = await new_file.download_to_drive()
|
||||
ogg_path = str(path)
|
||||
wav_path = str(path) + ".wav"
|
||||
ogg_seg = AudioSegment.from_ogg(ogg_path)
|
||||
ogg_seg.export(wav_path, format="wav")
|
||||
await msg.edit_text("Converted!")
|
||||
ogg_file = NamedTemporaryFile(mode='wb', suffix='.oga', delete=False)
|
||||
logger.debug(f"Created temporary oga at {ogg_file.name}")
|
||||
ogg_file.close()
|
||||
await new_file.download_to_drive(ogg_file.name)
|
||||
ogg_seg = AudioSegment.from_ogg(ogg_file.name)
|
||||
logger.debug(f"Imported ogg to pydub, deleting {ogg_file.name}")
|
||||
clean_file(ogg_file)
|
||||
wav_file = NamedTemporaryFile(mode='wb', suffix=".wav", delete=False)
|
||||
logger.debug(f"Created temporary wav file at {wav_file.name} to transcribe")
|
||||
ogg_seg.export(wav_file, format="wav")
|
||||
wav_file.close()
|
||||
await status_msg.edit_text("Converted!")
|
||||
|
||||
global speech_config
|
||||
audio_config = speechsdk.audio.AudioConfig(filename=wav_path)
|
||||
audio_config = speechsdk.audio.AudioConfig(filename=wav_file.name)
|
||||
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
|
||||
|
||||
done = False
|
||||
current_message = ""
|
||||
def stop_cb(evt):
|
||||
"""callback that signals to stop continuous recognition upon receiving an event `evt`"""
|
||||
logger.debug('CLOSING on {}'.format(evt))
|
||||
nonlocal done
|
||||
done = True
|
||||
transcription = ""
|
||||
###
|
||||
# Commented code here is intended for whenever I figure out how
|
||||
# best to run async calls (send message/edit/delete) from a
|
||||
# synchronous function call, nested inside an async function.
|
||||
# Azure speech won't call functions async even when doing 'async
|
||||
# recognition', annoyingly.
|
||||
###
|
||||
# async def update_message_async(evt, current_message=current_message):
|
||||
# """Takes event from Azure and updates a message object to show realtime transcribing"""
|
||||
# logger.info(f"Async Message: {evt.result.text}")
|
||||
# if not current_message:
|
||||
# current_message = await context.bot.send_message(chat_id=chat_id,
|
||||
|
@ -48,60 +69,61 @@ async def handle_voice(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
|||
# reply_to_message_id=voice_id)
|
||||
# else:
|
||||
# await current_message.edit_text(evt.result.text)
|
||||
# def update_message(evt):
|
||||
# logger.info(f"Message: {evt.result.text}")
|
||||
# loop = get_event_loop()
|
||||
# loop.create_task(update_message_async(evt, current_message))
|
||||
# loop.run_until_complete()
|
||||
def stop_cb(evt):
|
||||
"""callback that signals to stop continuous recognition upon receiving an event `evt`"""
|
||||
logger.debug('CLOSING on {}'.format(evt))
|
||||
nonlocal done
|
||||
done = True
|
||||
def finish_message(evt):
|
||||
# previous_message_id = current_message.id
|
||||
# loop = get_event_loop()
|
||||
# loop.create_task(current_message.edit_text(evt.result.text))
|
||||
# current_message = None
|
||||
# loop.run_until_complete()
|
||||
nonlocal current_message
|
||||
current_message += evt.result.text + " "
|
||||
# def start_session(evt):
|
||||
# loop = get_event_loop()
|
||||
# loop.create_task()
|
||||
# loop.run_until_complete()
|
||||
# def finish_session(evt):
|
||||
# loop = get_event_loop()
|
||||
# loop.create_task(msg.delete())
|
||||
# loop.run_until_complete()
|
||||
# Connect callbacks to the events fired by the speech recognizer
|
||||
#speech_recognizer.recognizing.connect(update_message)
|
||||
nonlocal transcription
|
||||
transcription += evt.result.text + " "
|
||||
speech_recognizer.recognized.connect(finish_message)
|
||||
#speech_recognizer.session_started.connect(start_session)
|
||||
#speech_recognizer.session_stopped.connect(finish_session)
|
||||
# speech_recognizer.canceled.connect(lambda evt: logger.info('CANCELED {}'.format(evt)))
|
||||
# stop continuous recognition on either session stopped or canceled events
|
||||
speech_recognizer.session_stopped.connect(stop_cb)
|
||||
#speech_recognizer.canceled.connect(stop_cb)
|
||||
speech_recognizer.canceled.connect(stop_cb)
|
||||
|
||||
# Start continuous speech recognition
|
||||
speech_recognizer.start_continuous_recognition_async()
|
||||
await msg.edit_text('Recognizing...')
|
||||
if update.message.voice.duration > 120:
|
||||
await status_msg.edit_text("Recognizing. Due to audio length, this could take a few minutes. Please be patient. You'll get notified when it's completed.")
|
||||
else:
|
||||
await status_msg.edit_text('Recognizing...')
|
||||
while not done:
|
||||
await context.bot.send_chat_action(chat_id=chat_id,action=ChatAction.TYPING)
|
||||
await sleep(2)
|
||||
await sleep(5)
|
||||
speech_recognizer.stop_continuous_recognition_async()
|
||||
await msg.edit_text(current_message)
|
||||
|
||||
await status_msg.delete()
|
||||
await context.bot.send_message(chat_id=chat_id, text=transcription,
|
||||
reply_to_message_id=voice_id)
|
||||
logger.debug(f"Cleaning up, deleting {wav_file.name}")
|
||||
clean_file(wav_file)
|
||||
|
||||
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
await context.bot.send_message(chat_id=update.effective_chat.id, text="Forward me a voice message and I'll transcribe it!")
|
||||
async def help(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
await context.bot.send_message(chat_id=update.effective_chat.id, text="Forward me a voice message. It may take a few minutes to transcribe. I'll reply to the voice message with a new one containing the transcription, so you can go do something else while you're waiting.")
|
||||
|
||||
if __name__ == '__main__':
|
||||
lg = logging.getLogger('main')
|
||||
load_dotenv()
|
||||
global speech_config
|
||||
speech_config = speechsdk.SpeechConfig(subscription=getenv('SPEECH_KEY'), region=getenv('SPEECH_REGION'))
|
||||
pers = PicklePersistence(filepath='bot.pickle')
|
||||
bot_token = getenv('TELEGRAM_BOT_TOKEN')
|
||||
application = ApplicationBuilder().token(bot_token).persistence(persistence=pers).build()
|
||||
application = ApplicationBuilder().token(bot_token)
|
||||
application = application.persistence(persistence=pers)
|
||||
application = application.build()
|
||||
start_handler = CommandHandler('start', start)
|
||||
voice_handler = MessageHandler(VOICE, handle_voice)
|
||||
help_handler = CommandHandler('help', help)
|
||||
voice_handler = MessageHandler(VOICE, handle_voice, block=True)
|
||||
if getenv('TELEGRAM_BOT_ALLOWED_CHAT_IDS'):
|
||||
chat_ids = [int(x) for x in getenv('TELEGRAM_BOT_ALLOWED_CHAT_IDS').split(',')]
|
||||
lg.info(f"Restricting to these chats: {chat_ids}")
|
||||
start_handler.filters = Chat(chat_id=chat_ids)
|
||||
help_handler.filters = Chat(chat_id=chat_ids)
|
||||
voice_handler.filters = VOICE & Chat(chat_id=chat_ids)
|
||||
application.add_handler(start_handler)
|
||||
application.add_handler(help_handler)
|
||||
application.add_handler(voice_handler)
|
||||
|
||||
application.run_polling()
|
Loading…
Add table
Add a link
Reference in a new issue