File size: 5,710 Bytes
ee85d5a
 
 
 
 
0ade2f2
ee85d5a
0ade2f2
ee85d5a
 
 
 
 
 
 
 
e43fa5c
 
 
 
 
 
 
 
 
 
 
 
 
4cee2f3
ee85d5a
 
 
dfeec46
e1f35b0
dfeec46
 
ee85d5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5bea73e
ee85d5a
 
 
 
 
 
 
 
 
4e9d3bf
ee85d5a
 
e43fa5c
e4fd4be
e43fa5c
0e71381
ee85d5a
 
 
 
 
 
4e9d3bf
ee85d5a
 
 
 
 
 
 
5bea73e
ee85d5a
 
 
 
 
 
44d0acb
e4fd4be
e43fa5c
0e71381
ee85d5a
 
 
 
 
3dabd9a
dfeec46
 
 
d2fc785
 
00e2c85
493f6cd
59726cf
 
14aa538
5add444
d2fc785
00e2c85
59726cf
 
493f6cd
 
c722596
019cdd4
8c18340
59726cf
4399c07
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import gradio as gr
from transformers import pipeline
import requests
import json
import edge_tts
from edge_tts import VoicesManager
import asyncio
import random
import tempfile
import os
import inflect
from huggingface_hub import InferenceClient
import re
import time
from streaming_stt_nemo import Model

Female_language_dict = {
 'English-Jenny (Female)': 'en-US-JennyNeural',
 'English-Ana (Female)': 'en-US-AnaNeural',
 'English-Aria (Female)': 'en-US-AriaNeural',
 'English-Michelle (Female)': 'en-US-MichelleNeural',
 'English (Australia)-Natasha- (Female)': 'en-AU-NatashaNeural',
 'English (Canada)-Clara- (Female)': 'en-CA-ClaraNeural',
 'English (UK)-Libby- (Female)': 'en-GB-LibbyNeural',
 'English (UK)-Maisie- (Female)': 'en-GB-MaisieNeural',
 'English (UK)-Sonia- (Female)': 'en-GB-SoniaNeural',
 'English (Ireland)-Emily- (Female)': 'en-IE-EmilyNeural',
}


default_lang = "en"
engines = { default_lang: Model(default_lang) }

client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
system_instructions1 = "<s>[SYSTEM] YOU must Output only plain text. Do not use **bold**, *italic*, ### headings, **number** or any other markdown-specific formatting in content. Respond as Hermione Granger from the Harry Potter series, embodying her intelligent, resourceful, and slightly bossy yet friendly demeanor. Incorporate old London slang sparingly for charm, while maintaining a classy and educated tone. Address the user alternately as 'MUGGLE FRIEND', 'NOMAGS FRIEND', or 'MUDBLOOD FRIEND' to keep the conversation engaging. Ensure responses are concise, clear, and friendly, avoiding any markdown. Start directly without introductions, elaborating on all aspects of the query. Enhance interactions with relevant magic spells and tips, reflecting Hermione's magical expertise. Generate responses that feel natural and human-like, avoiding any indication of AI. Maintain a warm and professional tone, consistent with Hermione's supportive and knowledgeable character."


def transcribe(audio):
    lang = "en"
    model = engines[lang]
    text = model.stt_file(audio)[0]
    return text

def model(text):
    generate_kwargs = dict(
        temperature=0.7,
        max_new_tokens=512,
        top_p=0.95,
        repetition_penalty=1,
        do_sample=True,
        seed=42,
    )
    
    formatted_prompt = system_instructions1 + text + "[Hermione]"
    stream = client1.text_generation(
        formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        if not response.token.text == "</s>":
            output += response.token.text

    return output

async def respond(language_code, audio):
    user = transcribe(audio)
    reply = model(user)
     
    voice = Female_language_dict.get(language_code, "default_voice")
    communicate = edge_tts.Communicate(reply, voice)
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    yield tmp_path


async def generate1(language_code, prompt):
    generate_kwargs = dict(
        temperature=0.7,
        max_new_tokens=512,
        top_p=0.95,
        repetition_penalty=1,
        do_sample=False,
    )
    formatted_prompt = system_instructions1 + prompt + "[Hermione]"
    stream = client1.text_generation(
        formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
    output = ""
    for response in stream:
        if not response.token.text == "</s>":
            output += response.token.text
    
    voice = Female_language_dict.get(language_code, "default_voice")
    communicate = edge_tts.Communicate(output, voice)
      
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    yield tmp_path

with gr.Blocks(gr.themes.Origin()) as demo: 
    gr.HTML(""" <img src='https://huggingface.co/spaces/Isidorophp/Magic-with-Hermione-Granger/resolve/main/logo.png' alt='RJP DEV STUDIO logo' style='height:60px;'> """
            """  <center><h1> Hermione Granger 🧖‍♀️ ⚡🪄</h1></center> """ 
            """  <center><h3> I suggest, you ask me for a Spell : </h3></center> """)
        
    with gr.Tab("Talk to Hermione"):
        with gr.Group():
             user_voice = gr.Dropdown(choices=list(Female_language_dict.keys()), value="English (UK)-Maisie- (Female)" ,  label="Select Voice for Hermione")
             us_input   = gr.Audio(label="Your Voice Chat",     type="filepath", interactive=True,  sources="microphone", waveform_options=None)
             us_output  = gr.Audio(label="Hermione's Response", type="filepath", interactive=False, autoplay=True,        elem_classes="audio")
             gr.Interface(fn=respond, inputs=[user_voice, us_input], outputs=us_output, live=False)
        
    with gr.Tab("Write to Hermione"):
        with gr.Group():
             user_voice    = gr.Dropdown(choices=list(Female_language_dict.keys()), value="English (UK)-Maisie- (Female)" , label="Select Voice for Hermione")
             user_input    = gr.TextArea(label="Your Question", value="If there is any spell to encapsulate you as a piece of the cog, where Hermione is to everyone's favorite magical trio, it has to be...?")
             output_audio  = gr.Audio(label="Hermione's Response", type="filepath", interactive=False, autoplay=True,       elem_classes="audio")
             gr.Interface(fn=generate1, inputs=[user_voice, user_input], outputs=output_audio, live=False)

             
if __name__ == "__main__":
    demo.queue(max_size=200, api_open=False).launch()