File size: 2,985 Bytes
2f3b32c
 
 
 
 
 
 
 
 
ab0e126
 
 
 
 
2f3b32c
3adcf48
 
2f3b32c
d23995b
ab0e126
d23995b
 
 
 
 
 
 
 
 
 
 
2f3b32c
 
ace0051
2f3b32c
 
 
 
 
d23995b
 
ab0e126
 
d23995b
ab0e126
 
 
174fb8e
d23995b
 
ab0e126
 
 
 
 
 
a042c28
 
 
 
 
 
 
bbd8adf
a042c28
 
 
 
 
d23995b
a042c28
 
ab0e126
174fb8e
288afe4
2f3b32c
 
a042c28
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import re
import gradio as gr
import edge_tts
import asyncio
import time
import tempfile
from huggingface_hub import InferenceClient

Fast = """## Fastest Model"""

Complex = """## Best in Complex Question"""

Detail = """## Best for Detailed Generation or Long Answers"""

client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
#client1 = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")

system_instructions1 = "<s>[SYSTEM] You are AI assistant named DorjGPT, Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses as if super interlligent AI assistant. The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"

global history
history = []
def format_prompt(message, history):
    prompt = system_instructions1
    for user_prompt, bot_response in history:
        prompt += f"[INST] {user_prompt} [/INST]"
        prompt += f" {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    return prompt

async def generate1(prompt, history=[], b=None):
    generate_kwargs = dict(
        temperature=0.6,
        max_new_tokens=256,
        top_p=0.95,
        repetition_penalty=1,
        do_sample=True,
        seed=42,
    )
    #formatted_prompt = system_instructions1 + prompt + "[JARVIS]"
    formatted_prompt = format_prompt(f"{system_instructions1}, {prompt}", history) + "[JARVIS]"
    stream = client1.text_generation(
        formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)

    output = ""
    for response in stream:
        output += response.token.text
    output = output.replace("</s>","")
    history.append([prompt, output])

    communicate = edge_tts.Communicate(output)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    yield tmp_path

with gr.Blocks(theme="gradio/monochrome", title="Dorj Assistant") as demo:
    gr.HTML("""
        <h1 style="text-align: center; style="font-size: 3m;">
        DorjGPT
        </h1>
        """)
    with gr.Column():
        output_audio = gr.Audio(label="DorjGPT", type="filepath",
                  interactive=False,
                  autoplay=True,
                  elem_classes="audio")
        
        user_input = gr.Textbox(label="Prompt", value="What is Mongolia")

    with gr.Tab():
      with gr.Row():
        translate_btn = gr.Button("Response")
        translate_btn.click(fn=generate1, inputs=user_input,
                            outputs=output_audio, api_name="translate")  

if __name__ == "__main__":
    demo.queue(max_size=30).launch()