Create named temporary files for safety

notify long rec, delete status msg & send new
Added restriction option for chats
2023-11-30 12:17:02 -06:00 · 2023-11-30 12:16:11 -06:00 · 2023-11-30 12:05:11 -06:00
1 changed files with 71 additions and 49 deletions
--- a/bot.py
+++ b/bot.py
@ -1,46 +1,67 @@
+## Main bot
+# A lot of the handler code for speechsdk is copy/pasted from the
+# Azure documentation example for continuous recognition for compressed
+# audio, but reworked to use pydub instead. 
+
 from telegram import Update
 from telegram.constants import ChatAction
 from telegram.ext import ApplicationBuilder, ContextTypes, MessageHandler, CommandHandler, PicklePersistence
-from telegram.ext.filters import VOICE
+from telegram.ext.filters import VOICE, Chat
 import logging
-from os import getenv
+from os import getenv, unlink
 from dotenv import load_dotenv
 import azure.cognitiveservices.speech as speechsdk
-from asyncio import sleep, get_event_loop
+from asyncio import sleep
 from pydub import AudioSegment
+from tempfile import NamedTemporaryFile

 logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    level=logging.INFO
 )
+async def clean_file_async(context: ContextTypes.DEFAULT_TYPE):
+    job = context.job
+    job.data['file'].close()
+    unlink(job.data['file'].name)
 async def handle_voice(update: Update, context: ContextTypes.DEFAULT_TYPE):
    logger = logging.getLogger('Recognizer')
    logger.info("Received voice message!")
    chat_id = update.effective_chat.id
    voice_id = update.message.id
-    msg = await context.bot.send_message(chat_id=chat_id, text="Working on it!", 
+    status_msg = await context.bot.send_message(chat_id=chat_id, text="Working on it!", 
                                         reply_to_message_id=voice_id)
    
+    def clean_file(file):
+        context.job_queue.run_once(clean_file_async, 60.0, {'file':file})
    new_file = await context.bot.get_file(update.message.voice.file_id)
-    path = await new_file.download_to_drive()
-    ogg_path = str(path)
-    wav_path = str(path) + ".wav"
-    ogg_seg = AudioSegment.from_ogg(ogg_path)
-    ogg_seg.export(wav_path, format="wav")
-    await msg.edit_text("Converted!")
+    ogg_file = NamedTemporaryFile(mode='wb', suffix='.oga', delete=False)
+    logger.debug(f"Created temporary oga at {ogg_file.name}")
+    ogg_file.close()
+    await new_file.download_to_drive(ogg_file.name)
+    ogg_seg = AudioSegment.from_ogg(ogg_file.name)
+    logger.debug(f"Imported ogg to pydub, deleting {ogg_file.name}")
+    clean_file(ogg_file)
+    wav_file = NamedTemporaryFile(mode='wb', suffix=".wav", delete=False)
+    logger.debug(f"Created temporary wav file at {wav_file.name} to transcribe")
+    ogg_seg.export(wav_file, format="wav")
+    wav_file.close()
+    await status_msg.edit_text("Converted!")
    
    global speech_config
-    audio_config = speechsdk.audio.AudioConfig(filename=wav_path)
+    audio_config = speechsdk.audio.AudioConfig(filename=wav_file.name)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)

    done = False
-    current_message = ""
-    def stop_cb(evt):
-        """callback that signals to stop continuous recognition upon receiving an event `evt`"""
-        logger.debug('CLOSING on {}'.format(evt))
-        nonlocal done
-        done = True
+    transcription = ""
+    ### 
+    # Commented code here is intended for whenever I figure out how
+    # best to run async calls (send message/edit/delete) from a
+    # synchronous function call, nested inside an async function.
+    # Azure speech won't call functions async even when doing 'async
+    #  recognition', annoyingly.
+    ###
    # async def update_message_async(evt, current_message=current_message):
+    #     """Takes event from Azure and updates a message object to show realtime transcribing"""
    #     logger.info(f"Async Message: {evt.result.text}")
    #     if not current_message:
    #         current_message = await context.bot.send_message(chat_id=chat_id, 
@ -48,60 +69,61 @@ async def handle_voice(update: Update, context: ContextTypes.DEFAULT_TYPE):
    #                                                    reply_to_message_id=voice_id)
    #     else:
    #         await current_message.edit_text(evt.result.text)
-    # def update_message(evt):
-    #     logger.info(f"Message: {evt.result.text}")
-    #     loop = get_event_loop()
-    #     loop.create_task(update_message_async(evt, current_message))
-    #     loop.run_until_complete()
+    def stop_cb(evt):
+        """callback that signals to stop continuous recognition upon receiving an event `evt`"""
+        logger.debug('CLOSING on {}'.format(evt))
+        nonlocal done
+        done = True
    def finish_message(evt):
-        # previous_message_id = current_message.id
-        # loop = get_event_loop()
-        # loop.create_task(current_message.edit_text(evt.result.text))
-        # current_message = None
-        # loop.run_until_complete()
-        nonlocal current_message
-        current_message += evt.result.text + " "
-    # def start_session(evt):
-    #     loop = get_event_loop()
-    #     loop.create_task()
-    #     loop.run_until_complete()
-    # def finish_session(evt):
-    #     loop = get_event_loop()
-    #     loop.create_task(msg.delete())
-    #     loop.run_until_complete()
-    # Connect callbacks to the events fired by the speech recognizer
-    #speech_recognizer.recognizing.connect(update_message)
+        nonlocal transcription
+        transcription += evt.result.text + " "
    speech_recognizer.recognized.connect(finish_message)
-    #speech_recognizer.session_started.connect(start_session)
-    #speech_recognizer.session_stopped.connect(finish_session)
-    # speech_recognizer.canceled.connect(lambda evt: logger.info('CANCELED {}'.format(evt)))
    # stop continuous recognition on either session stopped or canceled events
    speech_recognizer.session_stopped.connect(stop_cb)
-    #speech_recognizer.canceled.connect(stop_cb)
+    speech_recognizer.canceled.connect(stop_cb)

    # Start continuous speech recognition
    speech_recognizer.start_continuous_recognition_async()
-    await msg.edit_text('Recognizing...')
+    if update.message.voice.duration > 120:
+        await status_msg.edit_text("Recognizing. Due to audio length, this could take a few minutes. Please be patient. You'll get notified when it's completed.")
+    else:
+        await status_msg.edit_text('Recognizing...')
    while not done:
        await context.bot.send_chat_action(chat_id=chat_id,action=ChatAction.TYPING)
-        await sleep(2)
+        await sleep(5)
    speech_recognizer.stop_continuous_recognition_async()
-    await msg.edit_text(current_message)
-
+    await status_msg.delete()
+    await context.bot.send_message(chat_id=chat_id, text=transcription, 
+                                    reply_to_message_id=voice_id)
+    logger.debug(f"Cleaning up, deleting {wav_file.name}")
+    clean_file(wav_file)

 async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await context.bot.send_message(chat_id=update.effective_chat.id, text="Forward me a voice message and I'll transcribe it!")
+async def help(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    await context.bot.send_message(chat_id=update.effective_chat.id, text="Forward me a voice message. It may take a few minutes to transcribe. I'll reply to the voice message with a new one containing the transcription, so you can go do something else while you're waiting.")

 if __name__ == '__main__':
+    lg = logging.getLogger('main')
    load_dotenv()
    global speech_config
    speech_config = speechsdk.SpeechConfig(subscription=getenv('SPEECH_KEY'), region=getenv('SPEECH_REGION'))
    pers = PicklePersistence(filepath='bot.pickle')
    bot_token = getenv('TELEGRAM_BOT_TOKEN')
-    application = ApplicationBuilder().token(bot_token).persistence(persistence=pers).build()
+    application = ApplicationBuilder().token(bot_token)
+    application = application.persistence(persistence=pers)
+    application = application.build()
    start_handler = CommandHandler('start', start)
-    voice_handler = MessageHandler(VOICE, handle_voice)
+    help_handler = CommandHandler('help', help)
+    voice_handler = MessageHandler(VOICE, handle_voice, block=True)
+    if getenv('TELEGRAM_BOT_ALLOWED_CHAT_IDS'):
+        chat_ids = [int(x) for x in getenv('TELEGRAM_BOT_ALLOWED_CHAT_IDS').split(',')]
+        lg.info(f"Restricting to these chats: {chat_ids}")
+        start_handler.filters = Chat(chat_id=chat_ids)
+        help_handler.filters = Chat(chat_id=chat_ids)
+        voice_handler.filters = VOICE & Chat(chat_id=chat_ids)
    application.add_handler(start_handler)
+    application.add_handler(help_handler)
    application.add_handler(voice_handler)
    
    application.run_polling()
Author	SHA1	Message	Date
Kay Ohtie	efea9771c8	Create named temporary files for safety	2023-11-30 12:17:02 -06:00
Kay Ohtie	3a7bc4cf93	notify long rec, delete status msg & send new	2023-11-30 12:16:11 -06:00
Kay Ohtie	e33c3e41a0	Added restriction option for chats	2023-11-30 12:05:11 -06:00