Spaces:
Sleeping
Sleeping
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import gradio as gr | |
import spaces | |
# Load the model and tokenizer | |
model_name = "NoaiGPT/merged-llama3-8b-instruct-1720894657" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Move model to GPU if available | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
# Define the prediction function | |
def generate_text(prompt): | |
# Tokenize the input and move to GPU if available | |
inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
# Generate text using the model | |
outputs = model.generate(inputs.input_ids, max_length=200, num_return_sequences=1) | |
# Decode the generated text | |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return generated_text | |
# Define the Gradio interface | |
interface = gr.Interface( | |
fn=generate_text, | |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), | |
outputs="text", | |
title="LLaMA 3 Text Generation", | |
description="Generate text using the LLaMA 3 model fine-tuned for instruction-following tasks." | |
) | |
# Launch the interface | |
interface.launch() | |