Spaces:

arshadrana
/

voice-to-text

Sleeping

File size: 1,542 Bytes

309b067
ae43f08
309b067
ae43f08
ebcc9f2
 
 
 
ae43f08
a78e93c
 
 
 
ae43f08
a78e93c
 
 
 
 
 
 
 
 
 
 
ae43f08
0fe9a40
ae43f08
a78e93c
ae43f08
 
 
 
a78e93c
ae43f08
 
309b067
ae43f08
309b067
ae43f08
a78e93c
309b067
 
ae43f08
309b067
 
0fe9a40

import gradio as gr
import requests

# Function to send audio to Groq API and get transcription
def transcribe(audio_path):
    # Read audio file in binary mode
    with open(audio_path, "rb") as audio_file:
        audio_data = audio_file.read()

    # Groq API endpoint for audio transcription
    groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"

    # Replace 'YOUR_GROQ_API_KEY' with your actual Groq API key
    headers = {
        "Authorization": "Bearer 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",
    }

    # Prepare the files and data for the request
    files = {
        'file': ('audio.wav', audio_data, 'audio/wav'),
    }
    data = {
        'model': 'whisper-large-v3-turbo',  # Specify the model to use
        'response_format': 'json',          # Desired response format
        'language': 'en',                   # Language of the audio
    }

    # Send audio to Groq API
    response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)

    # Parse response
    if response.status_code == 200:
        result = response.json()
        return result.get("text", "No transcription available.")
    else:
        return f"Error: {response.status_code}, {response.text}"

# Gradio interface
iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(source="microphone", type="filepath"),
    outputs="text",
    title="Voice to Text Converter",
    description="Record your voice, and it will be transcribed into text using Groq API."
)

iface.launch()