lodhrangpt commited on
Commit
93cef8c
·
verified ·
1 Parent(s): fdbd451

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -23
app.py CHANGED
@@ -1,34 +1,23 @@
1
  import gradio as gr
2
  import requests
3
- import numpy as np
4
- import io
5
- import wave
6
 
7
  # Function to send audio to Groq API and get transcription
8
- def transcribe(audio_data):
9
- # Convert the NumPy audio array to bytes
10
- audio_bytes = io.BytesIO()
11
-
12
- # Convert NumPy array to WAV format (use appropriate rate, channels, etc.)
13
- with wave.open(audio_bytes, "wb") as wf:
14
- wf.setnchannels(1) # Mono channel
15
- wf.setsampwidth(2) # 16-bit audio
16
- wf.setframerate(16000) # Assuming 16kHz sample rate
17
- wf.writeframes(audio_data.tobytes())
18
-
19
- audio_bytes.seek(0) # Rewind to the beginning
20
 
21
  # Groq API endpoint for audio transcription
22
  groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
23
-
24
  # Replace 'YOUR_GROQ_API_KEY' with your actual Groq API key
25
  headers = {
26
- "Authorization": "Bearer YOUR_GROQ_API_KEY",
27
  }
28
-
29
  # Prepare the files and data for the request
30
  files = {
31
- 'file': ('audio.wav', audio_bytes, 'audio/wav'),
32
  }
33
  data = {
34
  'model': 'whisper-large-v3-turbo', # Specify the model to use
@@ -49,10 +38,10 @@ def transcribe(audio_data):
49
  # Gradio interface
50
  iface = gr.Interface(
51
  fn=transcribe,
52
- inputs=gr.Audio(source="microphone", type="numpy"), # Changed to numpy
53
  outputs="text",
54
- title="Voice to Text Converter",
55
- description="Record your voice, and it will be transcribed into text using Groq API."
56
  )
57
 
58
- iface.launch()
 
1
  import gradio as gr
2
  import requests
 
 
 
3
 
4
  # Function to send audio to Groq API and get transcription
5
+ def transcribe(audio_path):
6
+ # Read audio file in binary mode
7
+ with open(audio_path, "rb") as audio_file:
8
+ audio_data = audio_file.read()
 
 
 
 
 
 
 
 
9
 
10
  # Groq API endpoint for audio transcription
11
  groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
12
+
13
  # Replace 'YOUR_GROQ_API_KEY' with your actual Groq API key
14
  headers = {
15
+ "Authorization": "Bearer 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",
16
  }
17
+
18
  # Prepare the files and data for the request
19
  files = {
20
+ 'file': ('audio.wav', audio_data, 'audio/wav'),
21
  }
22
  data = {
23
  'model': 'whisper-large-v3-turbo', # Specify the model to use
 
38
  # Gradio interface
39
  iface = gr.Interface(
40
  fn=transcribe,
41
+ inputs=gr.Audio(type="filepath"), # Removed 'source' parameter for compatibility
42
  outputs="text",
43
+ title="Voice to Text Converter App",
44
+
45
  )
46
 
47
+ iface.launch()