import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import spaces MODEL_NAME = 'NousResearch/Genstruct-7B' model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map='cuda', load_in_8bit=True) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) @spaces.GPU def generate_text(title, content): msg = [{ 'title': title, 'content': content }] inputs = tokenizer.apply_chat_template(msg, return_tensors='pt').cuda() output = tokenizer.decode(model.generate(inputs, max_new_tokens=512)[0]).split(tokenizer.eos_token)[0] return output demo = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(label="Title"), gr.Textbox(label="Content", lines=5) ], outputs=gr.Textbox(label="Generated Output", lines=10), title="Genstruct-7B Text Generation Demo", description="Enter a title and content to generate text using the Genstruct-7B model." ) if __name__ == "__main__": demo.launch()