import gradio as gr
from huggingface_hub import InferenceClient
client = InferenceClient("google/gemma-2-27b-it")
def generate_text(messages):
generated = ""
for token in client.chat_completion(messages, max_tokens=50,stream=True):
content = (token.choices[0].delta.content)
generated+=content
print(generated)
return generated #no stram version
def call_generate_text(message, history):
if len(message) == 0:
message.append({"role": "system", "content": "you response around 10 words"})
# history.append({"role": "user", "content": message})
print(message)
print(history)
messages = history+[{"role":"user","content":message}]
try:
text = generate_text(messages)
messages += [{"role":"assistant","content":text}]
return "",messages
except RuntimeError as e:
print(f"An unexpected error occurred: {e}")
return "",history
head = '''
'''
with gr.Blocks(title="LLM with TTS",head=head) as demo:
gr.Markdown("## Please be patient, the first response may have a delay of up to 20 seconds while loading.")
gr.Markdown("**gemma-2-27b-it/LJSpeech**.LLM and TTS models will change without notice.")
js = """
function(chatbot){
text = (chatbot[chatbot.length -1])["content"]
tts_text = window.replaceSpecialChars(text)
console.log(tts_text)
window.MatchaTTSEn(tts_text,"./models/test.txt")
}
"""
chatbot = gr.Chatbot(type="messages")
chatbot.change(None,[chatbot],[],js=js)
msg = gr.Textbox()
clear = gr.ClearButton([msg, chatbot])
gr.HTML("""