Voice-To-Text

Sleeping

lodhrangpt commited on Nov 9, 2024

Commit

93cef8c

verified ·

1 Parent(s): fdbd451

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,34 +1,23 @@
 import gradio as gr
 import requests
-import numpy as np
-import io
-import wave
 # Function to send audio to Groq API and get transcription
-def transcribe(audio_data):
-    # Convert the NumPy audio array to bytes
-    audio_bytes = io.BytesIO()
-    # Convert NumPy array to WAV format (use appropriate rate, channels, etc.)
-    with wave.open(audio_bytes, "wb") as wf:
-        wf.setnchannels(1)  # Mono channel
-        wf.setsampwidth(2)  # 16-bit audio
-        wf.setframerate(16000)  # Assuming 16kHz sample rate
-        wf.writeframes(audio_data.tobytes())
-    audio_bytes.seek(0)  # Rewind to the beginning
     # Groq API endpoint for audio transcription
     groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
     # Replace 'YOUR_GROQ_API_KEY' with your actual Groq API key
     headers = {
-        "Authorization": "Bearer YOUR_GROQ_API_KEY",
     }
     # Prepare the files and data for the request
     files = {
-        'file': ('audio.wav', audio_bytes, 'audio/wav'),
     }
     data = {
         'model': 'whisper-large-v3-turbo',  # Specify the model to use
@@ -49,10 +38,10 @@ def transcribe(audio_data):
 # Gradio interface
 iface = gr.Interface(
     fn=transcribe,
-    inputs=gr.Audio(source="microphone", type="numpy"),  # Changed to numpy
     outputs="text",
-    title="Voice to Text Converter",
-    description="Record your voice, and it will be transcribed into text using Groq API."
 )
-iface.launch()

 import gradio as gr
 import requests
 # Function to send audio to Groq API and get transcription
+def transcribe(audio_path):
+    # Read audio file in binary mode
+    with open(audio_path, "rb") as audio_file:
+        audio_data = audio_file.read()
     # Groq API endpoint for audio transcription
     groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
     # Replace 'YOUR_GROQ_API_KEY' with your actual Groq API key
     headers = {
+        "Authorization": "Bearer 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",
     }
     # Prepare the files and data for the request
     files = {
+        'file': ('audio.wav', audio_data, 'audio/wav'),
     }
     data = {
         'model': 'whisper-large-v3-turbo',  # Specify the model to use
 # Gradio interface
 iface = gr.Interface(
     fn=transcribe,
+    inputs=gr.Audio(type="filepath"),  # Removed 'source' parameter for compatibility
     outputs="text",
+    title="Voice to Text Converter App",
 )
+iface.launch()