Manishkumaryadav's picture
Update app.py
a716036 verified
raw
history blame
2.14 kB
import gradio as gr
import os
import spacy
import torch
from transformers import pipeline
import speech_recognition as sr
from gtts import gTTS
import tempfile
import base64
# Install required Spacy model
os.system("python -m spacy download en_core_web_sm")
nlp = spacy.load("en_core_web_sm")
# Load Hugging Face model (Example: Bloom or other LLM from Hugging Face)
chat_model = pipeline("text-generation", model="bigscience/bloom-560m")
# Speech-to-Text function
def transcribe_audio(audio_path):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_path) as source:
audio_data = recognizer.record(source)
try:
return recognizer.recognize_google(audio_data)
except sr.UnknownValueError:
return "Could not understand the audio."
# AI Chat Response
def chat_with_ai(user_input):
response = chat_model(user_input, max_length=150, do_sample=True, temperature=0.7)
return response[0]['generated_text']
# Text-to-Speech function
def generate_speech(text):
tts = gTTS(text=text, lang='en')
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_file.name)
with open(temp_file.name, "rb") as audio_file:
encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
os.unlink(temp_file.name)
return encoded_audio
# Chat Interface
def chat_interface(user_input, audio_file=None):
if audio_file is not None:
user_input = transcribe_audio(audio_file)
ai_response = chat_with_ai(user_input)
audio_response = generate_speech(ai_response)
return ai_response, f"data:audio/mp3;base64,{audio_response}"
# Create Gradio UI
gui = gr.Interface(
fn=chat_interface,
inputs=[
gr.Textbox(lines=2, placeholder="Type your message here..."),
gr.Audio(sources=["microphone", "upload"], type="filepath")
],
outputs=[
gr.Textbox(label="AI Response"),
gr.Audio(label="AI Voice Response")
],
title="AI Chat Assistant",
description="An AI-powered chat assistant with text & voice input/output.",
theme="huggingface"
)
gui.launch()