import torch from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr import spaces # Load the model and tokenizer model_name = "NoaiGPT/merged-llama3-8b-instruct-1720894657" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Move model to GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # Define the prediction function @spaces.GPU def generate_text(prompt): # Tokenize the input and move to GPU if available inputs = tokenizer(prompt, return_tensors="pt").to(device) # Generate text using the model outputs = model.generate(inputs.input_ids, max_length=200, num_return_sequences=1) # Decode the generated text generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return generated_text # Define the Gradio interface interface = gr.Interface( fn=generate_text, inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), outputs="text", title="LLaMA 3 Text Generation", description="Generate text using the LLaMA 3 model fine-tuned for instruction-following tasks." ) # Launch the interface interface.launch()