Voice-To-Text

Sleeping

App Files Files Community

lodhrangpt commited on Nov 13, 2024

Commit

1bcb7e9

verified ·

1 Parent(s): 85eb5ef

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -7

app.py CHANGED Viewed

@@ -9,31 +9,39 @@ import os
 # Ensure nltk resources are downloaded
 nltk.download("punkt")
 def transcribe(audio_path):
     with open(audio_path, "rb") as audio_file:
         audio_data = audio_file.read()
     groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
     headers = {
         "Authorization": "Bearer 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",  # Replace with your actual API key
     }
-    files = {'file': ('audio.wav', audio_data, 'audio/wav')}
     data = {
         'model': 'whisper-large-v3-turbo',
         'response_format': 'json',
         'language': 'en',
     }
-    # Send audio to Groq API and handle errors
     response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
     if response.status_code == 200:
         result = response.json()
-        return result.get("text", "No transcription available.")
     else:
         error_msg = response.json().get("error", {}).get("message", "Unknown error.")
-        return f"API Error: {error_msg}"
 # Function to generate notes and questions
 def generate_notes(transcript):
@@ -102,8 +110,15 @@ def create_pdf(transcript, long_questions, short_questions, mcqs):
     return pdf_path
 # Gradio interface
 iface = gr.Interface(
-    fn=transcribe,
     inputs=gr.Audio(type="filepath"),
     outputs=gr.File(label="Download PDF with Notes and Questions"),
     title="Voice to Text Converter and Notes Generator",

 # Ensure nltk resources are downloaded
 nltk.download("punkt")
+# Function to send audio to Groq API and get transcription
 def transcribe(audio_path):
+    # Read audio file in binary mode
     with open(audio_path, "rb") as audio_file:
         audio_data = audio_file.read()
+    # Groq API endpoint for audio transcription
     groq_api_endpoint = "https://api.groq.com/openai/v1/audio/transcriptions"
     headers = {
         "Authorization": "Bearer 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",  # Replace with your actual API key
     }
+    files = {
+        'file': ('audio.wav', audio_data, 'audio/wav'),
+    }
     data = {
         'model': 'whisper-large-v3-turbo',
         'response_format': 'json',
         'language': 'en',
     }
+    # Send audio to Groq API
     response = requests.post(groq_api_endpoint, headers=headers, files=files, data=data)
     if response.status_code == 200:
         result = response.json()
+        transcript = result.get("text", "No transcription available.")
+        return generate_notes(transcript)
     else:
         error_msg = response.json().get("error", {}).get("message", "Unknown error.")
+        print(f"API Error: {error_msg}")
+        return None  # Indicate failure
 # Function to generate notes and questions
 def generate_notes(transcript):
     return pdf_path
 # Gradio interface
+def gradio_interface(audio_path):
+    pdf_path = transcribe(audio_path)
+    if pdf_path:
+        return pdf_path
+    else:
+        return "Error: Unable to process the audio file. Please check the API key and try again."
 iface = gr.Interface(
+    fn=gradio_interface,
     inputs=gr.Audio(type="filepath"),
     outputs=gr.File(label="Download PDF with Notes and Questions"),
     title="Voice to Text Converter and Notes Generator",