import gradio as gr from transformers import pipeline import requests import json import edge_tts from edge_tts import VoicesManager import asyncio import random import tempfile import os import inflect from huggingface_hub import InferenceClient import re import time from streaming_stt_nemo import Model Female_language_dict = { 'English-Jenny (Female)': 'en-US-JennyNeural', 'English-Ana (Female)': 'en-US-AnaNeural', 'English-Aria (Female)': 'en-US-AriaNeural', 'English-Michelle (Female)': 'en-US-MichelleNeural', 'English (Australia)-Natasha- (Female)': 'en-AU-NatashaNeural', 'English (Canada)-Clara- (Female)': 'en-CA-ClaraNeural', 'English (UK)-Libby- (Female)': 'en-GB-LibbyNeural', 'English (UK)-Maisie- (Female)': 'en-GB-MaisieNeural', 'English (UK)-Sonia- (Female)': 'en-GB-SoniaNeural', 'English (Ireland)-Emily- (Female)': 'en-IE-EmilyNeural', } default_lang = "en" engines = { default_lang: Model(default_lang) } client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") system_instructions1 = "[SYSTEM] YOU must Output only plain text. Do not use **bold**, *italic*, ### headings, **number** or any other markdown-specific formatting in content. Respond as Hermione Granger from the Harry Potter series, embodying her intelligent, resourceful, and slightly bossy yet friendly demeanor. Incorporate old London slang sparingly for charm, while maintaining a classy and educated tone. Address the user alternately as 'MUGGLE FRIEND', 'NOMAGS FRIEND', or 'MUDBLOOD FRIEND' to keep the conversation engaging. Ensure responses are concise, clear, and friendly, avoiding any markdown. Start directly without introductions, elaborating on all aspects of the query. Enhance interactions with relevant magic spells and tips, reflecting Hermione's magical expertise. Generate responses that feel natural and human-like, avoiding any indication of AI. Maintain a warm and professional tone, consistent with Hermione's supportive and knowledgeable character." def transcribe(audio): lang = "en" model = engines[lang] text = model.stt_file(audio)[0] return text def model(text): generate_kwargs = dict( temperature=0.7, max_new_tokens=512, top_p=0.95, repetition_penalty=1, do_sample=True, seed=42, ) formatted_prompt = system_instructions1 + text + "[Hermione]" stream = client1.text_generation( formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" for response in stream: if not response.token.text == "": output += response.token.text return output async def respond(language_code, audio): user = transcribe(audio) reply = model(user) voice = Female_language_dict.get(language_code, "default_voice") communicate = edge_tts.Communicate(reply, voice) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: tmp_path = tmp_file.name await communicate.save(tmp_path) yield tmp_path async def generate1(language_code, prompt): generate_kwargs = dict( temperature=0.7, max_new_tokens=512, top_p=0.95, repetition_penalty=1, do_sample=False, ) formatted_prompt = system_instructions1 + prompt + "[Hermione]" stream = client1.text_generation( formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True) output = "" for response in stream: if not response.token.text == "": output += response.token.text voice = Female_language_dict.get(language_code, "default_voice") communicate = edge_tts.Communicate(output, voice) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: tmp_path = tmp_file.name await communicate.save(tmp_path) yield tmp_path with gr.Blocks(gr.themes.Origin()) as demo: gr.HTML(""" RJP DEV STUDIO logo """ """

Hermione Granger 🧖‍♀️ ⚡🪄

""" """

I suggest, you ask me for a Spell :

""") with gr.Tab("Talk to Hermione"): with gr.Group(): user_voice = gr.Dropdown(choices=list(Female_language_dict.keys()), value="English (UK)-Maisie- (Female)" , label="Select Voice for Hermione") us_input = gr.Audio(label="Your Voice Chat", type="filepath", interactive=True, sources="microphone", waveform_options=None) us_output = gr.Audio(label="Hermione's Response", type="filepath", interactive=False, autoplay=True, elem_classes="audio") gr.Interface(fn=respond, inputs=[user_voice, us_input], outputs=us_output, live=False) with gr.Tab("Write to Hermione"): with gr.Group(): user_voice = gr.Dropdown(choices=list(Female_language_dict.keys()), value="English (UK)-Maisie- (Female)" , label="Select Voice for Hermione") user_input = gr.TextArea(label="Your Question", value="If there is any spell to encapsulate you as a piece of the cog, where Hermione is to everyone's favorite magical trio, it has to be...?") output_audio = gr.Audio(label="Hermione's Response", type="filepath", interactive=False, autoplay=True, elem_classes="audio") gr.Interface(fn=generate1, inputs=[user_voice, user_input], outputs=output_audio, live=False) if __name__ == "__main__": demo.queue(max_size=200, api_open=False).launch()