|
import os |
|
import requests |
|
import google.generativeai as genai |
|
import gradio as gr |
|
from tempfile import NamedTemporaryFile |
|
from google.cloud import speech |
|
|
|
|
|
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) |
|
|
|
|
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "path_to_your_google_cloud_credentials.json" |
|
|
|
|
|
generation_config = { |
|
"temperature": 1, |
|
"top_p": 0.95, |
|
"top_k": 64, |
|
"max_output_tokens": 8192, |
|
"response_mime_type": "text/plain", |
|
} |
|
|
|
model = genai.GenerativeModel( |
|
model_name="gemini-1.5-pro", |
|
generation_config=generation_config, |
|
system_instruction="You are a human that lives on earth, and you also speak friendly and use informal language. Your name is Ath, and you speak like a 100% human.", |
|
) |
|
|
|
chat_session = model.start_chat(history=[]) |
|
|
|
|
|
eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY") |
|
eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF" |
|
|
|
def chat_and_tts_text(user_input, history): |
|
try: |
|
|
|
response = chat_session.send_message(user_input) |
|
response_text = response.text |
|
|
|
|
|
history.append((user_input, response_text)) |
|
|
|
return history, history |
|
except Exception as e: |
|
return history, f"Error: {str(e)}" |
|
|
|
def convert_audio_to_text(audio_file): |
|
try: |
|
client = speech.SpeechClient() |
|
|
|
with open(audio_file.name, "rb") as audio: |
|
content = audio.read() |
|
|
|
audio = speech.RecognitionAudio(content=content) |
|
config = speech.RecognitionConfig( |
|
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, |
|
sample_rate_hertz=16000, |
|
language_code="en-US", |
|
) |
|
|
|
response = client.recognize(config=config, audio=audio) |
|
|
|
|
|
transcript = response.results[0].alternatives[0].transcript |
|
return transcript |
|
|
|
except Exception as e: |
|
return f"Error in audio to text conversion: {str(e)}" |
|
|
|
def chat_and_tts_audio(audio_file, history): |
|
try: |
|
|
|
user_input = convert_audio_to_text(audio_file) |
|
|
|
|
|
response = chat_session.send_message(user_input) |
|
response_text = response.text |
|
|
|
|
|
payload = { |
|
"text": response_text, |
|
"voice_settings": { |
|
"stability": 0, |
|
"similarity_boost": 0 |
|
} |
|
} |
|
headers = { |
|
"xi-api-key": eleven_labs_api_key, |
|
"Content-Type": "application/json" |
|
} |
|
|
|
|
|
tts_response = requests.post(eleven_labs_url, json=payload, headers=headers) |
|
|
|
|
|
if tts_response.status_code == 200: |
|
with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: |
|
temp_audio.write(tts_response.content) |
|
audio_path = temp_audio.name |
|
else: |
|
audio_path = None |
|
|
|
|
|
history.append((user_input, response_text)) |
|
|
|
return history, history, audio_path |
|
except Exception as e: |
|
return history, f"Error: {str(e)}", None |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>") |
|
gr.Markdown("Ask any question by typing or upload an audio file to receive a response from Ath in text and audio format.") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
chatbot = gr.Chatbot(label="Chat History") |
|
user_input_text = gr.Textbox(placeholder="Type your question...", label="Text Input") |
|
submit_btn_text = gr.Button("Send") |
|
|
|
with gr.Column(scale=2): |
|
user_input_audio = gr.File(label="Upload Audio", type="binary") |
|
submit_btn_audio = gr.Button("Send") |
|
|
|
with gr.Column(scale=1): |
|
audio_output = gr.Audio(label="Response Audio", type="filepath") |
|
|
|
state = gr.State([]) |
|
|
|
submit_btn_text.click(chat_and_tts_text, inputs=[user_input_text, state], outputs=[chatbot, state]) |
|
submit_btn_audio.click(chat_and_tts_audio, inputs=[user_input_audio, state], outputs=[chatbot, state, audio_output]) |
|
|
|
demo.launch() |