File size: 2,703 Bytes
b7e8009
2f3b32c
 
e450c90
2f3b32c
 
e450c90
 
 
e8674ce
3957879
e8674ce
 
 
3957879
 
d23995b
 
e450c90
 
 
4ed0b36
e450c90
 
 
 
 
d23995b
e450c90
 
 
 
 
d23995b
e450c90
 
e8674ce
e450c90
e8674ce
 
3957879
e8674ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e450c90
9e58c48
ab0e126
 
 
 
 
e450c90
a042c28
 
 
 
 
 
 
bbd8adf
a042c28
f0e7c18
a042c28
 
 
f0e7c18
d23995b
a042c28
 
f0e7c18
e450c90
288afe4
2f3b32c
 
a042c28
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import edge_tts
import tempfile
import gradio as gr
from huggingface_hub import InferenceClient

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("google/gemma-3-27b-it", token=os.getenv("TOKEN"))

# client = InferenceClient(
#     provider="fireworks-ai",
#     api_key=os.getenv("TOKEN"),
# )

global history
history = []
async def respond(
    message, 
    history=[],
    system_message="You are a DorjGPT, created by Dorjzodovsuren. You is a helpful assistant and always reply back in Mongolian, and only return Mongolian text within 50 words.",
    max_tokens=512,
    temperature=0.001,
    top_p=0.95,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        model="google/gemma-3-27b-it",
        messages=messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content
        response += token

    # completion = client.chat.completions.create(
    #     model="deepseek-ai/DeepSeek-R1",
    #     messages=messages,
    #     max_tokens=500,
    # )
    # response = completion.choices[0].message.content
    # print(response)
        
    communicate = edge_tts.Communicate(response, voice="mn-MN-YesuiNeural")
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    yield tmp_path


with gr.Blocks(theme="gradio/monochrome", title="Dorj Assistant") as demo:
    gr.HTML("""
        <h1 style="text-align: center; style="font-size: 3m;">
        DorjGPT
        </h1>
        """)
    with gr.Column():
        output_audio = gr.Audio(label="DorjGPT", type="filepath",
                  interactive=False,
                  visible=False,
                  autoplay=True,
                  elem_classes="audio")
        
        user_input = gr.Textbox(label="Question", value="What is this application?")

    with gr.Tab():
      with gr.Row():
        translate_btn = gr.Button("Submit")
        translate_btn.click(fn=respond, inputs=user_input,
                            outputs=output_audio, api_name="translate")  

if __name__ == "__main__":
    demo.queue(max_size=30).launch()