59 lines
1.9 KiB
Python
59 lines
1.9 KiB
Python
import speech_recognition as sr
|
|
import os
|
|
from pydub import AudioSegment
|
|
from pydub.silence import split_on_silence
|
|
|
|
# sound = AudioSegment.from_mp3("test.mp3")
|
|
# sound.export("test.wav", format="wav")
|
|
|
|
fname = "ciberseguretat.wav"
|
|
keyWords = ['ciberseguretat', 'hacker', 'atac', 'pentesting']
|
|
|
|
r = sr.Recognizer()
|
|
|
|
def transcript_audio(audio):
|
|
with sr.AudioFile(fname) as source:
|
|
audio_data = r.record(source)
|
|
text = r.recognize_whisper(audio_data, language='ca')
|
|
return(text)
|
|
|
|
def large_audio(path, minutes=5):
|
|
"""Splitting the large audio file into fixed interval chunks
|
|
and apply speech recognition on each of these chunks"""
|
|
print("Loading file")
|
|
sound = AudioSegment.from_file(path)
|
|
print(len(sound))
|
|
print("Splitting file")
|
|
chunk_length_ms = int(1000 * 60 * minutes) # convert to milliseconds
|
|
chunks = [sound[i:i + chunk_length_ms] for i in range(0, len(sound), chunk_length_ms)]
|
|
folder_name = "audio-fixed-chunks"
|
|
|
|
if not os.path.isdir(folder_name):
|
|
os.mkdir(folder_name)
|
|
whole_text = ""
|
|
print("Starting transcription")
|
|
total_chunks = len(chunks)
|
|
for i, audio_chunk in enumerate(chunks, start=1):
|
|
print(f"Chunk {i} of {total_chunks}")
|
|
# export audio chunk and save it in
|
|
# the `folder_name` directory.
|
|
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
|
|
audio_chunk.export(chunk_filename, format="wav")
|
|
# recognize the chunk
|
|
try:
|
|
text = transcript_audio(chunk_filename)
|
|
except sr.UnknownValueError as e:
|
|
print("Error:", str(e))
|
|
else:
|
|
text = f"{text.capitalize()}. "
|
|
# print(chunk_filename, ":", text)
|
|
whole_text += text
|
|
# return the text for all chunks detected
|
|
return whole_text
|
|
|
|
if __name__=="__main__":
|
|
text = large_audio(fname)
|
|
fname = "transcript.txt"
|
|
with open(fname, 'w') as f:
|
|
f.writeline(text)
|