podcastFilter/app.py

import speech_recognition as sr
import os 
from pydub import AudioSegment
from pydub.silence import split_on_silence

# sound = AudioSegment.from_mp3("test.mp3")
# sound.export("test.wav", format="wav")

fname = "ciberseguretat.wav"
keyWords = ['ciberseguretat', 'hacker', 'atac', 'pentesting']

r = sr.Recognizer()

def transcript_audio(audio):
    with sr.AudioFile(fname) as source:
        audio_data = r.record(source)
        text = r.recognize_whisper(audio_data, language='ca')
    return(text)

def large_audio(path, minutes=5):
    """Splitting the large audio file into fixed interval chunks
    and apply speech recognition on each of these chunks"""
    print("Loading file")
    sound = AudioSegment.from_file(path)  
    print(len(sound))
    print("Splitting file")
    chunk_length_ms = int(1000 * 60 * minutes) # convert to milliseconds
    chunks = [sound[i:i + chunk_length_ms] for i in range(0, len(sound), chunk_length_ms)]
    folder_name = "audio-fixed-chunks"
    
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    whole_text = ""
    print("Starting transcription")
    total_chunks = len(chunks)
    for i, audio_chunk in enumerate(chunks, start=1):
        print(f"Chunk {i} of {total_chunks}")
        # export audio chunk and save it in
        # the `folder_name` directory.
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        # recognize the chunk
        try:
            text = transcript_audio(chunk_filename)
        except sr.UnknownValueError as e:
            print("Error:", str(e))
        else:
            text = f"{text.capitalize()}. "
            # print(chunk_filename, ":", text)
            whole_text += text
    # return the text for all chunks detected
    return whole_text

if __name__=="__main__":
    text = large_audio(fname)
    fname = "transcript.txt"
    with open(fname, 'w') as f:
        f.writeline(text)
inital commit 2023-10-04 15:18:18 +01:00			`import speech_recognition as sr`
			`import os`
			`from pydub import AudioSegment`
			`from pydub.silence import split_on_silence`

			`# sound = AudioSegment.from_mp3("test.mp3")`
			`# sound.export("test.wav", format="wav")`

			`fname = "ciberseguretat.wav"`
			`keyWords = ['ciberseguretat', 'hacker', 'atac', 'pentesting']`

			`r = sr.Recognizer()`

			`def transcript_audio(audio):`
			`with sr.AudioFile(fname) as source:`
			`audio_data = r.record(source)`
			`text = r.recognize_whisper(audio_data, language='ca')`
			`return(text)`

			`def large_audio(path, minutes=5):`
			`"""Splitting the large audio file into fixed interval chunks`
			`and apply speech recognition on each of these chunks"""`
			`print("Loading file")`
			`sound = AudioSegment.from_file(path)`
			`print(len(sound))`
			`print("Splitting file")`
			`chunk_length_ms = int(1000 * 60 * minutes) # convert to milliseconds`
			`chunks = [sound[i:i + chunk_length_ms] for i in range(0, len(sound), chunk_length_ms)]`
			`folder_name = "audio-fixed-chunks"`

			`if not os.path.isdir(folder_name):`
			`os.mkdir(folder_name)`
			`whole_text = ""`
			`print("Starting transcription")`
save transcript 2023-10-04 15:48:18 +01:00			`total_chunks = len(chunks)`
inital commit 2023-10-04 15:18:18 +01:00			`for i, audio_chunk in enumerate(chunks, start=1):`
save transcript 2023-10-04 15:48:18 +01:00			`print(f"Chunk {i} of {total_chunks}")`
inital commit 2023-10-04 15:18:18 +01:00			`# export audio chunk and save it in`
			# the `folder_name` directory.
			`chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")`
			`audio_chunk.export(chunk_filename, format="wav")`
			`# recognize the chunk`
			`try:`
			`text = transcript_audio(chunk_filename)`
			`except sr.UnknownValueError as e:`
			`print("Error:", str(e))`
			`else:`
			`text = f"{text.capitalize()}. "`
save transcript 2023-10-04 15:48:18 +01:00			`# print(chunk_filename, ":", text)`
inital commit 2023-10-04 15:18:18 +01:00			`whole_text += text`
			`# return the text for all chunks detected`
			`return whole_text`

			`if __name__=="__main__":`
save transcript 2023-10-04 15:48:18 +01:00			`text = large_audio(fname)`
			`fname = "transcript.txt"`
			`with open(fname, 'w') as f:`
			`f.writeline(text)`