import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import torch import spaces def load_model(model_name): device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoModelForCausalLM.from_pretrained( model_name, device_map=device, torch_dtype="auto", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained(model_name) generator = pipeline( "text-generation", model=model, tokenizer=tokenizer, return_full_text=False, max_new_tokens=500, do_sample=False ) return generator @spaces.GPU def generate_text(prompt, model_name): generator = load_model(model_name) messages = [{"role": "user", "content": prompt}] output = generator(messages) return output[0]["generated_text"] # Create Gradio interface demo = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(lines=2, placeholder="Enter your prompt here..."), gr.Dropdown( choices=["Qwen/Qwen2.5-1.5B-Instruct","microsoft/Phi-3-mini-4k-instruct", "ALLaM-AI/ALLaM-7B-Instruct-preview"], label="Choose Model", value="ALLaM-AI/ALLaM-7B-Instruct-preview" ) ], outputs=gr.Textbox(label="Generated Text"), title="Text Generator", description="Enter a prompt and generate text using one of the available models.", examples=[ ["Tell me a funny joke about chickens.", "microsoft/Phi-3-mini-4k-instruct"], ["أخبرني نكتة مضحكة عن الدجاج.", "ALLaM-AI/ALLaM-7B-Instruct-preview"] ] ) demo.launch()