Spaces:

Tonic
/

LiteLlama

Runtime error

File size: 2,645 Bytes

2f369b9
 
 
 
5f4ec98
2f369b9
5f4ec98
 
2f369b9
 
 
 
 
 
 
 
 
14d6653
2647efe
14d6653
975dca5
2647efe
14d6653
2f369b9
834cadf
 
 
 
 
a23fe1b
 
 
 
 
 
834cadf
 
a23fe1b
2f369b9

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr

title = """🙋🏻‍♂️Welcome to🌟Tonic's 🦙LiteLlama📳On-Device Chat!"""
description = """
You can use this Space to test out the current model [ahxt/LiteLlama-460M-1T](https://huggingface.co/ahxt/LiteLlama-460M-1T) You can also use 🦙LiteLlama📳On-Device Chat by cloning this space. Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic1/Litellama?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3> 
Join us :  🌟TeamTonic is always making cool demos! Join our active🛠️builder's community on👻Discord: [Discord](https://discord.gg/nXx5wbX9) On🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟[PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
"""
model_path = 'ahxt/LiteLlama-460M-1T'
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def generate_text(question):
    prompt = f'Q: {question}\nA:'
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
    tokens = model.generate(input_ids, max_length=50, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(tokens[0], skip_special_tokens=False)
    return response.split('\nA: ')[-1] 

# Gradio Blocks interface
with gr.Blocks() as iface:
    gr.Markdown(title)
    gr.Markdown(description)
    with gr.Row():
        with gr.Column():
            question = gr.Textbox(label="Speak to LiteLlama", lines=2, placeholder="What are the best Japanese gardens in Paris?")
            submit_button = gr.Button("Submit")
        with gr.Column():
            output = gr.Textbox(label="🦙LiteLlama", lines=6)

    submit_button.click(fn=generate_text, inputs=question, outputs=output)

# Launch the interface
iface.launch()