File size: 1,324 Bytes
13d3de7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d117b91
13d3de7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import gradio as gr
from logging_config import log_buffer
from transcription_tool import TranscriptTool  # Assuming TranscriptionTool is in `transcription_tool.py`

# smolagent transcription tool
transcript_tool = TranscriptTool()


def transcribe_and_stream_logs(file):
    # Use the path to the uploaded file
    temp_file_path = file.name

    # Perform transcription
    transcription_result = transcript_tool.forward(temp_file_path)

    # Stream logs
    log_buffer.seek(0)
    logs = log_buffer.read()

    return transcription_result, logs


with gr.Blocks() as app:
    gr.Markdown("# TranscriptTool: Transcribe Audio/Video")
    gr.Markdown("TranscriptTool is a smolagent tool used to transcribe audio and video files into text. This tool allows agents to process multimedia inputs efficiently. Can be used within a smolagent via the Hugging Face API.")

    file_input = gr.File(label="Upload Audio/Video File", file_types=["audio", "video"])
    transcribe_button = gr.Button("Transcribe")

    transcription_output = gr.Textbox(label="Transcription", lines=10)
    log_output = gr.Textbox(label="Logs", lines=15)

    transcribe_button.click(
        fn=transcribe_and_stream_logs,
        inputs=file_input,
        outputs=[transcription_output, log_output]
    )

if __name__ == "__main__":
    app.launch()