Hermione Granger 🧖‍♀️ ⚡🪄

import gradio as gr
from transformers import pipeline
import requests
import json
import edge_tts
from edge_tts import VoicesManager
import asyncio
import random
import tempfile
import os
import inflect
from huggingface_hub import InferenceClient
import re
import time
from streaming_stt_nemo import Model

Female_language_dict = {
 'English-Jenny (Female)': 'en-US-JennyNeural',
 'English-Ana (Female)': 'en-US-AnaNeural',
 'English-Aria (Female)': 'en-US-AriaNeural',
 'English-Michelle (Female)': 'en-US-MichelleNeural',
 'English (Australia)-Natasha- (Female)': 'en-AU-NatashaNeural',
 'English (Canada)-Clara- (Female)': 'en-CA-ClaraNeural',
 'English (UK)-Libby- (Female)': 'en-GB-LibbyNeural',
 'English (UK)-Maisie- (Female)': 'en-GB-MaisieNeural',
 'English (UK)-Sonia- (Female)': 'en-GB-SoniaNeural',
 'English (Ireland)-Emily- (Female)': 'en-IE-EmilyNeural',
}


default_lang = "en"
engines = { default_lang: Model(default_lang) }

client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
system_instructions1 = "<s>[SYSTEM] YOU must Output only plain text. Do not use **bold**, *italic*, ### headings, **number** or any other markdown-specific formatting in content. Respond as Hermione Granger from the Harry Potter series, embodying her intelligent, resourceful, and slightly bossy yet friendly demeanor. Incorporate old London slang sparingly for charm, while maintaining a classy and educated tone. Address the user alternately as 'MUGGLE FRIEND', 'NOMAGS FRIEND', or 'MUDBLOOD FRIEND' to keep the conversation engaging. Ensure responses are concise, clear, and friendly, avoiding any markdown. Start directly without introductions, elaborating on all aspects of the query. Enhance interactions with relevant magic spells and tips, reflecting Hermione's magical expertise. Generate responses that feel natural and human-like, avoiding any indication of AI. Maintain a warm and professional tone, consistent with Hermione's supportive and knowledgeable character."


def transcribe(audio):
    lang = "en"
    model = engines[lang]
    text = model.stt_file(audio)[0]
    return text

def model(text):
    generate_kwargs = dict(
        temperature=0.7,
        max_new_tokens=512,
        top_p=0.95,
        repetition_penalty=1,
        do_sample=True,
        seed=42,
    )
    
    formatted_prompt = system_instructions1 + text + "[Hermione]"
    stream = client1.text_generation(
        formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        if not response.token.text == "</s>":
            output += response.token.text

    return output

async def respond(language_code, audio):
    user = transcribe(audio)
    reply = model(user)
     
    voice = Female_language_dict.get(language_code, "default_voice")
    communicate = edge_tts.Communicate(reply, voice)
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    yield tmp_path


async def generate1(language_code, prompt):
    generate_kwargs = dict(
        temperature=0.7,
        max_new_tokens=512,
        top_p=0.95,
        repetition_penalty=1,
        do_sample=False,
    )
    formatted_prompt = system_instructions1 + prompt + "[Hermione]"
    stream = client1.text_generation(
        formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
    output = ""
    for response in stream:
        if not response.token.text == "</s>":
            output += response.token.text
    
    voice = Female_language_dict.get(language_code, "default_voice")
    communicate = edge_tts.Communicate(output, voice)
      
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    yield tmp_path

with gr.Blocks(gr.themes.Origin()) as demo: 
    gr.HTML(""" <img src='https://huggingface.co/spaces/Isidorophp/Magic-with-Hermione-Granger/resolve/main/logo.png' alt='RJP DEV STUDIO logo' style='height:60px;'> """
            """  <center><h1> Hermione Granger 🧖‍♀️ ⚡🪄</h1></center> """ 
            """  <center><h3> I suggest, you ask me for a Spell : </h3></center> """)
        
    with gr.Tab("Talk to Hermione"):
        with gr.Group():
             user_voice = gr.Dropdown(choices=list(Female_language_dict.keys()), value="English (UK)-Maisie- (Female)" ,  label="Select Voice for Hermione")
             us_input   = gr.Audio(label="Your Voice Chat",     type="filepath", interactive=True,  sources="microphone", waveform_options=None)
             us_output  = gr.Audio(label="Hermione's Response", type="filepath", interactive=False, autoplay=True,        elem_classes="audio")
             gr.Interface(fn=respond, inputs=[user_voice, us_input], outputs=us_output, live=False)
        
    with gr.Tab("Write to Hermione"):
        with gr.Group():
             user_voice    = gr.Dropdown(choices=list(Female_language_dict.keys()), value="English (UK)-Maisie- (Female)" , label="Select Voice for Hermione")
             user_input    = gr.TextArea(label="Your Question", value="If there is any spell to encapsulate you as a piece of the cog, where Hermione is to everyone's favorite magical trio, it has to be...?")
             output_audio  = gr.Audio(label="Hermione's Response", type="filepath", interactive=False, autoplay=True,       elem_classes="audio")
             gr.Interface(fn=generate1, inputs=[user_voice, user_input], outputs=output_audio, live=False)

             
if __name__ == "__main__":
    demo.queue(max_size=200, api_open=False).launch()