Spaces:
Sleeping
Sleeping
File size: 1,900 Bytes
309b067 ae43f08 fdbd451 309b067 ae43f08 fdbd451 ae43f08 a78e93c fdbd451 a78e93c ae43f08 fdbd451 a78e93c fdbd451 a78e93c fdbd451 a78e93c ae43f08 0fe9a40 ae43f08 a78e93c ae43f08 a78e93c ae43f08 309b067 ae43f08 309b067 ae43f08 fdbd451 309b067 ae43f08 309b067 0fe9a40 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import gradio as gr
import requests
import numpy as np
import io
import wave
# Function to send audio to Groq API and get transcription
def transcribe(audio_data):
# Convert the NumPy audio array to bytes
audio_bytes = io.BytesIO()
# Convert NumPy array to WAV format (use appropriate rate, channels, etc.)
with wave.open(audio_bytes, "wb") as wf:
wf.setnchannels(1) # Mono channel
wf.setsampwidth(2) # 16-bit audio
wf.setframerate(16000) # Assuming 16kHz sample rate
wf.writeframes(audio_data.tobytes())
audio_bytes.seek(0) # Rewind to the beginning
# Groq API endpoint for audio transcription
groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
# Replace 'YOUR_GROQ_API_KEY' with your actual Groq API key
headers = {
"Authorization": "Bearer YOUR_GROQ_API_KEY",
}
# Prepare the files and data for the request
files = {
'file': ('audio.wav', audio_bytes, 'audio/wav'),
}
data = {
'model': 'whisper-large-v3-turbo', # Specify the model to use
'response_format': 'json', # Desired response format
'language': 'en', # Language of the audio
}
# Send audio to Groq API
response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
# Parse response
if response.status_code == 200:
result = response.json()
return result.get("text", "No transcription available.")
else:
return f"Error: {response.status_code}, {response.text}"
# Gradio interface
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(source="microphone", type="numpy"), # Changed to numpy
outputs="text",
title="Voice to Text Converter",
description="Record your voice, and it will be transcribed into text using Groq API."
)
iface.launch()
|