| import gradio as gr | |
| import torch | |
| import random | |
| from transformers import T5Tokenizer, T5ForConditionalGeneration | |
| def load_model(model_path, dtype): | |
| if dtype == "fp32": | |
| torch_dtype = torch.float32 | |
| elif dtype == "fp16": | |
| torch_dtype = torch.float16 | |
| else: | |
| raise ValueError("Invalid dtype. Only 'fp32' or 'fp16' are supported.") | |
| model = T5ForConditionalGeneration.from_pretrained(model_path, torch_dtype=torch_dtype) | |
| return model | |
| def generate( | |
| prompt, | |
| history, | |
| max_new_tokens, | |
| repetition_penalty, | |
| temperature, | |
| top_p, | |
| top_k, | |
| seed_checkbox, | |
| seed, | |
| model_path="roborovski/superprompt-v1", | |
| dtype="fp16", | |
| ): | |
| tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small") | |
| model = load_model(model_path, dtype) | |
| if torch.cuda.is_available(): | |
| device = "cuda" | |
| print("Using GPU") | |
| else: | |
| device = "cpu" | |
| print("Using CPU") | |
| model.to(device) | |
| input_text = f"{prompt}, {history}" | |
| input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device) | |
| if seed_checkbox: | |
| seed = random.randint(1, 100000) | |
| torch.manual_seed(seed) | |
| outputs = model.generate( | |
| input_ids, | |
| max_new_tokens=max_new_tokens, | |
| repetition_penalty=repetition_penalty, | |
| do_sample=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| top_k=top_k, | |
| ) | |
| better_prompt = tokenizer.decode(outputs[0]) | |
| return better_prompt | |
| additional_inputs = [ | |
| gr.Slider( | |
| value=512, | |
| minimum=250, | |
| maximum=512, | |
| step=1, | |
| interactive=True, | |
| label="Max New Tokens", | |
| info="The maximum numbers of new tokens, controls how long is the output", | |
| ), | |
| gr.Slider( | |
| value=1.2, | |
| minimum=0, | |
| maximum=2, | |
| step=0.05, | |
| interactive=True, | |
| label="Repetition Penalty", | |
| info="Penalize repeated tokens, making the AI repeat less itself", | |
| ), | |
| gr.Slider( | |
| value=0.5, | |
| minimum=0, | |
| maximum=1, | |
| step=0.05, | |
| interactive=True, | |
| label="Temperature", | |
| info="Higher values produce more diverse outputs", | |
| ), | |
| gr.Slider( | |
| value=1, | |
| minimum=0, | |
| maximum=2, | |
| step=0.05, | |
| interactive=True, | |
| label="Top P", | |
| info="Higher values sample more low-probability tokens", | |
| ), | |
| gr.Slider( | |
| value=1, | |
| minimum=1, | |
| maximum=100, | |
| step=1, | |
| interactive=True, | |
| label="Top K", | |
| info="Higher k means more diverse outputs by considering a range of tokens", | |
| ), | |
| gr.Checkbox( | |
| value=False, | |
| label="Use Random Seed", | |
| info="Check to use a random seed for the generation process", | |
| change=update_ui, | |
| ), | |
| gr.Number( | |
| value=42, | |
| interactive=True, | |
| label="Seed", | |
| info="A starting point to initiate the generation process", | |
| ), | |
| gr.Radio( | |
| choices=["fp32", "fp16"], | |
| value="fp16", | |
| label="Model Precision", | |
| info="Select the precision of the model: fp32 or fp16", | |
| ), | |
| ] | |
| def update_ui(seed_checkbox): | |
| if seed_checkbox: | |
| return gr.Number.update(visible=False) | |
| else: | |
| return gr.Number.update(visible=True) | |
| examples = [ | |
| [ | |
| "Expand the following prompt to add more detail: A storefront with 'Text to Image' written on it.", | |
| None, | |
| None, | |
| None, | |
| None, | |
| None, | |
| None, | |
| False, | |
| None, | |
| "roborovski/superprompt-v1", | |
| "fp16", | |
| ] | |
| ] | |
| gr.ChatInterface( | |
| fn=generate, | |
| chatbot=gr.Chatbot( | |
| show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel" | |
| ), | |
| additional_inputs=additional_inputs, | |
| title="SuperPrompt-v1", | |
| description="Make your prompts more detailed!", | |
| examples=examples, | |
| concurrency_limit=20, | |
| ).launch(show_api=False) |