Spaces:

ariankhalfani
/

RobertaSpeak

Runtime error

App Files Files Community

ariankhalfani commited on Jun 16, 2024

Commit

0f1374b

verified ·

1 Parent(s): 5530ee1

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -54

app.py CHANGED Viewed

@@ -1,79 +1,133 @@
 import requests
-from pydub import AudioSegment
-from io import BytesIO
 import gradio as gr
 import os
 # Hugging Face API URLs
 API_URL_ROBERTA = "https://api-inference.huggingface.co/models/deepset/roberta-base-squad2"
 API_URL_TTS = "https://api-inference.huggingface.co/models/espnet/english_male_ryanspeech_tacotron"
 # Function to query the RoBERTa model
-def query_roberta(api_token, prompt, context):
-    payload = {
-        "inputs": {
-            "question": prompt,
-            "context": context
-        }
-    }
-    headers = {"Authorization": f"Bearer {api_token}"}
-    response = requests.post(API_URL_ROBERTA, headers=headers, json=payload)
-    try:
-        response.raise_for_status()  # Raise an error for bad responses
-        return response.json()
-    except requests.exceptions.HTTPError as e:
-        return {"error": f"HTTP error occurred: {e}"}
-    except ValueError as e:
-        return {"error": f"Value error occurred: {e}"}
-    except Exception as e:
-        return {"error": f"An unexpected error occurred: {e}"}
 # Function to generate speech from text using ESPnet TTS
-def generate_speech(api_token, answer):
-    payload = {
-        "inputs": answer,
-    }
-    headers = {"Authorization": f"Bearer {api_token}"}
-    response = requests.post(API_URL_TTS, headers=headers, json=payload)
-    try:
-        response.raise_for_status()  # Raise an error for bad responses
-        audio = response.content
-        audio_segment = AudioSegment.from_file(BytesIO(audio), format="flac")
-        audio_file_path = "/tmp/answer.wav"
-        audio_segment.export(audio_file_path, format="wav")
-        return audio_file_path
-    except requests.exceptions.HTTPError as e:
-        print(f"HTTP error occurred: {e}")
-        return None
-    except Exception as e:
-        print(f"An unexpected error occurred: {e}")
-        return None
-# Function to interface with Gradio
-def gradio_interface(api_token, context, prompt):
-    answer = query_roberta(api_token, prompt, context)
-    if 'error' in answer:
-        return answer['error'], None
-    answer_text = answer.get('answer', 'No answer found')
-    audio_file_path = generate_speech(api_token, answer_text)
-    return answer_text, audio_file_path
 # Define the Gradio interface
 iface = gr.Interface(
-    fn=gradio_interface,
     inputs=[
-        gr.Textbox(type="password", lines=1, label="Hugging Face API Token", placeholder="Enter your Hugging Face API token here..."),
         gr.Textbox(lines=2, label="Context", placeholder="Enter the context here..."),
-        gr.Textbox(lines=1, label="Question", placeholder="Enter your question here...")
     ],
     outputs=[
         gr.Textbox(label="Answer"),
-        gr.Audio(label="Answer as Speech", type="filepath")  # Changed to filepath type
     ],
     title="Chat with Roberta with Voice",
-    description="Ask questions based on a provided context using the Roberta model and hear the response via text-to-speech."
 )
 # Launch the Gradio app
-if __name__ == "__main__":
-    iface.launch(share=True)

 import requests
 import gradio as gr
 import os
+from pydub import AudioSegment
+from io import BytesIO
+import time
 # Hugging Face API URLs
 API_URL_ROBERTA = "https://api-inference.huggingface.co/models/deepset/roberta-base-squad2"
 API_URL_TTS = "https://api-inference.huggingface.co/models/espnet/english_male_ryanspeech_tacotron"
+API_URL_WHISPER = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
+# Hugging Face API Token from environment variable
+API_TOKEN = os.getenv("API_KEY")
+HEADERS = {"Authorization": f"Bearer {API_TOKEN}"}
+# Retry settings
+MAX_RETRIES = 5
+RETRY_DELAY = 1  # seconds
+# Function to query the Whisper model for audio transcription
+def query_whisper(audio_path):
+    for attempt in range(MAX_RETRIES):
+        try:
+            if not audio_path:
+                raise ValueError("Audio file path is None")
+            if not os.path.exists(audio_path):
+                raise FileNotFoundError(f"Audio file does not exist: {audio_path}")
+            with open(audio_path, "rb") as f:
+                data = f.read()
+            response = requests.post(API_URL_WHISPER, headers=HEADERS, data=data)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            print(f"Whisper model query failed: {e}")
+            if attempt < MAX_RETRIES - 1:
+                print(f"Retrying Whisper model query ({attempt + 1}/{MAX_RETRIES})...")
+                time.sleep(RETRY_DELAY)
+            else:
+                return {"error": str(e)}
 # Function to query the RoBERTa model
+def query_roberta(prompt, context):
+    payload = {"inputs": {"question": prompt, "context": context}}
+    for attempt in range(MAX_RETRIES):
+        try:
+            response = requests.post(API_URL_ROBERTA, headers=HEADERS, json=payload)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            print(f"RoBERTa model query failed: {e}")
+            if attempt < MAX_RETRIES - 1:
+                print(f"Retrying RoBERTa model query ({attempt + 1}/{MAX_RETRIES})...")
+                time.sleep(RETRY_DELAY)
+            else:
+                return {"error": str(e)}
 # Function to generate speech from text using ESPnet TTS
+def generate_speech(answer):
+    payload = {"inputs": answer}
+    for attempt in range(MAX_RETRIES):
+        try:
+            response = requests.post(API_URL_TTS, headers=HEADERS, json=payload)
+            response.raise_for_status()
+            audio = response.content
+            audio_segment = AudioSegment.from_file(BytesIO(audio), format="flac")
+            audio_file_path = "/tmp/answer.wav"
+            audio_segment.export(audio_file_path, format="wav")
+            return audio_file_path
+        except Exception as e:
+            print(f"ESPnet TTS query failed: {e}")
+            if attempt < MAX_RETRIES - 1:
+                print(f"Retrying ESPnet TTS query ({attempt + 1}/{MAX_RETRIES})...")
+                time.sleep(RETRY_DELAY)
+            else:
+                return {"error": str(e)}
+# Function to handle the entire process
+def handle_all(context, audio):
+    for attempt in range(MAX_RETRIES):
+        try:
+            # Step 1: Transcribe audio
+            transcription = query_whisper(audio)
+            if 'error' in transcription:
+                raise Exception(transcription['error'])
+            question = transcription.get("text", "No transcription found")
+            # Step 2: Get answer from RoBERTa
+            answer = query_roberta(question, context)
+            if 'error' in answer:
+                raise Exception(answer['error'])
+            answer_text = answer.get('answer', 'No answer found')
+            # Step 3: Generate speech from answer
+            audio_file_path = generate_speech(answer_text)
+            if 'error' in audio_file_path:
+                raise Exception(audio_file_path['error'])
+            return answer_text, audio_file_path
+        except Exception as e:
+            print(f"Process failed: {e}")
+            if attempt < MAX_RETRIES - 1:
+                print(f"Retrying entire process ({attempt + 1}/{MAX_RETRIES})...")
+                time.sleep(RETRY_DELAY)
+            else:
+                return str(e), None
 # Define the Gradio interface
 iface = gr.Interface(
+    fn=handle_all,
     inputs=[
         gr.Textbox(lines=2, label="Context", placeholder="Enter the context here..."),
+        gr.Audio(type="filepath", label="Record your voice")
     ],
     outputs=[
         gr.Textbox(label="Answer"),
+        gr.Audio(label="Answer as Speech", type="filepath")
     ],
     title="Chat with Roberta with Voice",
+    description="Record your voice, get the transcription, use it as a question for the Roberta model, and hear the response via text-to-speech."
 )
 # Launch the Gradio app
+iface.launch()