import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import torch model_name = "ayyuce/SmolGRPO-135M" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1) # device=-1 ensures CPU usage def generate_text(prompt, max_new_tokens, temperature, top_p, do_sample): generate_kwargs = { "max_new_tokens": int(max_new_tokens), "temperature": float(temperature), "top_p": float(top_p), "do_sample": do_sample == "Yes", } generated_list = generator(prompt, **generate_kwargs) generated_text = generated_list[0]["generated_text"] return generated_text with gr.Blocks() as demo: gr.Markdown("# SmolGRPO-135M Text Generator") with gr.Row(): with gr.Column(): prompt = gr.Textbox(label="Prompt", lines=5, placeholder="Enter your prompt here...") max_new_tokens = gr.Number(label="Max New Tokens", value=256) temperature = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.5) top_p = gr.Slider(label="Top-p (Nucleus Sampling)", minimum=0.0, maximum=1.0, value=0.9) do_sample = gr.Dropdown(label="Do Sample", choices=["Yes", "No"], value="Yes") generate_button = gr.Button("Generate Text") with gr.Column(): output = gr.Textbox(label="Generated Text", lines=15) generate_button.click( fn=generate_text, inputs=[prompt, max_new_tokens, temperature, top_p, do_sample], outputs=output ) if __name__ == "__main__": demo.launch()