File size: 2,607 Bytes
c366a43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
import assemblyai as aai
import re
import os
import tempfile

# Set your AssemblyAI API key
aai.settings.api_key = os.genenv('ASSEMBLYAI_API_KEY')

def create_assembly_transcript(audio_file):
    transcriber = aai.Transcriber()
    transcript = transcriber.transcribe(
        audio_file.name, config=aai.TranscriptionConfig(speaker_labels=True)
    )
    return transcript

def transcript_to_string(transcript):
    output = ""
    for utterance in transcript.utterances:
        name = f"SPEAKER {utterance.speaker}"
        start_time = format_time(utterance.start)
        output += f"{name} {start_time}\n{utterance.text}\n\n"
    return output

def format_time(milliseconds):
    seconds = milliseconds // 1000
    hours, seconds = divmod(seconds, 3600)
    minutes, seconds = divmod(seconds, 60)
    return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02}"

def format_transcript(transcript_string):
    # Regex pattern for speaker labels
    speaker_label_pattern = r"^(.+?)(?=\s\d{2}:\d{2}:\d{2})"
    # Regex pattern for timestamps
    timestamp_pattern = r"(\d{2}:\d{2}:\d{2})"
    # Replace speaker labels with bold syntax
    formatted_transcript = re.sub(
        speaker_label_pattern, r"**\1**", transcript_string, flags=re.MULTILINE
    )
    # Replace timestamps with italicized syntax
    formatted_transcript = re.sub(
        timestamp_pattern, r"_\1_", formatted_transcript, flags=re.MULTILINE
    )
    return formatted_transcript

def transcribe_audio(audio_file):
    if audio_file is None:
        return "Please upload an audio file."
    
    transcript = create_assembly_transcript(audio_file)
    
    if transcript.error:
        return f"An error occurred: {transcript.error}"
    
    transcript_string = transcript_to_string(transcript)
    md_transcript = format_transcript(transcript_string)
    
    # Save the markdown transcript to a temporary file
    with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.md') as temp_file:
        temp_file.write(md_transcript)
        temp_file_path = temp_file.name
    
    return md_transcript, temp_file_path

def launch_app():
    iface = gr.Interface(
        fn=transcribe_audio,
        inputs=gr.Audio(type="filepath", label="Upload Audio File"),
        outputs=[
            gr.Textbox(label="Transcript Preview", lines=10),
            gr.File(label="Download Transcript")
        ],
        title="Audio Transcription App",
        description="Upload an audio file to get a transcription with speaker labels."
    )
    iface.launch()

if __name__ == "__main__":
    launch_app()