Spaces:
Sleeping
Sleeping
File size: 2,703 Bytes
b7e8009 2f3b32c e450c90 2f3b32c e450c90 e8674ce 3957879 e8674ce 3957879 d23995b e450c90 4ed0b36 e450c90 d23995b e450c90 d23995b e450c90 e8674ce e450c90 e8674ce 3957879 e8674ce e450c90 9e58c48 ab0e126 e450c90 a042c28 bbd8adf a042c28 f0e7c18 a042c28 f0e7c18 d23995b a042c28 f0e7c18 e450c90 288afe4 2f3b32c a042c28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import os
import edge_tts
import tempfile
import gradio as gr
from huggingface_hub import InferenceClient
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("google/gemma-3-27b-it", token=os.getenv("TOKEN"))
# client = InferenceClient(
# provider="fireworks-ai",
# api_key=os.getenv("TOKEN"),
# )
global history
history = []
async def respond(
message,
history=[],
system_message="You are a DorjGPT, created by Dorjzodovsuren. You is a helpful assistant and always reply back in Mongolian, and only return Mongolian text within 50 words.",
max_tokens=512,
temperature=0.001,
top_p=0.95,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
model="google/gemma-3-27b-it",
messages=messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
# completion = client.chat.completions.create(
# model="deepseek-ai/DeepSeek-R1",
# messages=messages,
# max_tokens=500,
# )
# response = completion.choices[0].message.content
# print(response)
communicate = edge_tts.Communicate(response, voice="mn-MN-YesuiNeural")
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
yield tmp_path
with gr.Blocks(theme="gradio/monochrome", title="Dorj Assistant") as demo:
gr.HTML("""
<h1 style="text-align: center; style="font-size: 3m;">
DorjGPT
</h1>
""")
with gr.Column():
output_audio = gr.Audio(label="DorjGPT", type="filepath",
interactive=False,
visible=False,
autoplay=True,
elem_classes="audio")
user_input = gr.Textbox(label="Question", value="What is this application?")
with gr.Tab():
with gr.Row():
translate_btn = gr.Button("Submit")
translate_btn.click(fn=respond, inputs=user_input,
outputs=output_audio, api_name="translate")
if __name__ == "__main__":
demo.queue(max_size=30).launch() |