lodhrangpt commited on
Commit
fdbd451
·
verified ·
1 Parent(s): 3cb49c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -9
app.py CHANGED
@@ -1,23 +1,34 @@
1
  import gradio as gr
2
  import requests
 
 
 
3
 
4
  # Function to send audio to Groq API and get transcription
5
- def transcribe(audio_path):
6
- # Read audio file in binary mode
7
- with open(audio_path, "rb") as audio_file:
8
- audio_data = audio_file.read()
 
 
 
 
 
 
 
 
9
 
10
  # Groq API endpoint for audio transcription
11
  groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
12
-
13
  # Replace 'YOUR_GROQ_API_KEY' with your actual Groq API key
14
  headers = {
15
- "Authorization": "Bearer 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",
16
  }
17
-
18
  # Prepare the files and data for the request
19
  files = {
20
- 'file': ('audio.wav', audio_data, 'audio/wav'),
21
  }
22
  data = {
23
  'model': 'whisper-large-v3-turbo', # Specify the model to use
@@ -38,7 +49,7 @@ def transcribe(audio_path):
38
  # Gradio interface
39
  iface = gr.Interface(
40
  fn=transcribe,
41
- inputs=gr.Audio(type="filepath"), # Removed 'source' parameter for compatibility
42
  outputs="text",
43
  title="Voice to Text Converter",
44
  description="Record your voice, and it will be transcribed into text using Groq API."
 
1
  import gradio as gr
2
  import requests
3
+ import numpy as np
4
+ import io
5
+ import wave
6
 
7
  # Function to send audio to Groq API and get transcription
8
+ def transcribe(audio_data):
9
+ # Convert the NumPy audio array to bytes
10
+ audio_bytes = io.BytesIO()
11
+
12
+ # Convert NumPy array to WAV format (use appropriate rate, channels, etc.)
13
+ with wave.open(audio_bytes, "wb") as wf:
14
+ wf.setnchannels(1) # Mono channel
15
+ wf.setsampwidth(2) # 16-bit audio
16
+ wf.setframerate(16000) # Assuming 16kHz sample rate
17
+ wf.writeframes(audio_data.tobytes())
18
+
19
+ audio_bytes.seek(0) # Rewind to the beginning
20
 
21
  # Groq API endpoint for audio transcription
22
  groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
23
+
24
  # Replace 'YOUR_GROQ_API_KEY' with your actual Groq API key
25
  headers = {
26
+ "Authorization": "Bearer YOUR_GROQ_API_KEY",
27
  }
28
+
29
  # Prepare the files and data for the request
30
  files = {
31
+ 'file': ('audio.wav', audio_bytes, 'audio/wav'),
32
  }
33
  data = {
34
  'model': 'whisper-large-v3-turbo', # Specify the model to use
 
49
  # Gradio interface
50
  iface = gr.Interface(
51
  fn=transcribe,
52
+ inputs=gr.Audio(source="microphone", type="numpy"), # Changed to numpy
53
  outputs="text",
54
  title="Voice to Text Converter",
55
  description="Record your voice, and it will be transcribed into text using Groq API."