Voice-To-Text

Sleeping

lodhrangpt commited on Nov 9, 2024

Commit

fdbd451

verified ·

1 Parent(s): 3cb49c8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,23 +1,34 @@
 import gradio as gr
 import requests
 # Function to send audio to Groq API and get transcription
-def transcribe(audio_path):
-    # Read audio file in binary mode
-    with open(audio_path, "rb") as audio_file:
-        audio_data = audio_file.read()
     # Groq API endpoint for audio transcription
     groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
     # Replace 'YOUR_GROQ_API_KEY' with your actual Groq API key
     headers = {
-        "Authorization": "Bearer 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",
     }
     # Prepare the files and data for the request
     files = {
-        'file': ('audio.wav', audio_data, 'audio/wav'),
     }
     data = {
         'model': 'whisper-large-v3-turbo',  # Specify the model to use
@@ -38,7 +49,7 @@ def transcribe(audio_path):
 # Gradio interface
 iface = gr.Interface(
     fn=transcribe,
-    inputs=gr.Audio(type="filepath"),  # Removed 'source' parameter for compatibility
     outputs="text",
     title="Voice to Text Converter",
     description="Record your voice, and it will be transcribed into text using Groq API."

 import gradio as gr
 import requests
+import numpy as np
+import io
+import wave
 # Function to send audio to Groq API and get transcription
+def transcribe(audio_data):
+    # Convert the NumPy audio array to bytes
+    audio_bytes = io.BytesIO()
+    # Convert NumPy array to WAV format (use appropriate rate, channels, etc.)
+    with wave.open(audio_bytes, "wb") as wf:
+        wf.setnchannels(1)  # Mono channel
+        wf.setsampwidth(2)  # 16-bit audio
+        wf.setframerate(16000)  # Assuming 16kHz sample rate
+        wf.writeframes(audio_data.tobytes())
+    audio_bytes.seek(0)  # Rewind to the beginning
     # Groq API endpoint for audio transcription
     groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
     # Replace 'YOUR_GROQ_API_KEY' with your actual Groq API key
     headers = {
+        "Authorization": "Bearer YOUR_GROQ_API_KEY",
     }
     # Prepare the files and data for the request
     files = {
+        'file': ('audio.wav', audio_bytes, 'audio/wav'),
     }
     data = {
         'model': 'whisper-large-v3-turbo',  # Specify the model to use
 # Gradio interface
 iface = gr.Interface(
     fn=transcribe,
+    inputs=gr.Audio(source="microphone", type="numpy"),  # Changed to numpy
     outputs="text",
     title="Voice to Text Converter",
     description="Record your voice, and it will be transcribed into text using Groq API."