import speech_recognition as sr import os from pydub import AudioSegment from pydub.silence import split_on_silence # sound = AudioSegment.from_mp3("test.mp3") # sound.export("test.wav", format="wav") fname = "ciberseguretat.wav" keyWords = ['ciberseguretat', 'hacker', 'atac', 'pentesting'] r = sr.Recognizer() def transcript_audio(audio): with sr.AudioFile(fname) as source: audio_data = r.record(source) text = r.recognize_whisper(audio_data, language='ca') return(text) def large_audio(path, minutes=5): """Splitting the large audio file into fixed interval chunks and apply speech recognition on each of these chunks""" print("Loading file") sound = AudioSegment.from_file(path) print(len(sound)) print("Splitting file") chunk_length_ms = int(1000 * 60 * minutes) # convert to milliseconds chunks = [sound[i:i + chunk_length_ms] for i in range(0, len(sound), chunk_length_ms)] folder_name = "audio-fixed-chunks" if not os.path.isdir(folder_name): os.mkdir(folder_name) whole_text = "" print("Starting transcription") for i, audio_chunk in enumerate(chunks, start=1): # export audio chunk and save it in # the `folder_name` directory. chunk_filename = os.path.join(folder_name, f"chunk{i}.wav") audio_chunk.export(chunk_filename, format="wav") # recognize the chunk try: text = transcript_audio(chunk_filename) except sr.UnknownValueError as e: print("Error:", str(e)) else: text = f"{text.capitalize()}. " print(chunk_filename, ":", text) whole_text += text # return the text for all chunks detected return whole_text if __name__=="__main__": print(large_audio(fname))