import gradio import torch from transformers import AutoModelForCausalLM, AutoTokenizer MODEL_NAME = "arnir0/Tiny-LLM" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) def generate_text(prompt, model, tokenizer, max_length=4096, temperature=0.8, top_k=50, top_p=0.95): inputs = tokenizer.encode(prompt, return_tensors="pt") outputs = model.generate( inputs, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, do_sample=True ) generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return generated_text def my_inference_function(text): prompt = f"Summary the context below\n\n{text}" generated_text = generate_text(prompt, model, tokenizer) return generated_text[len(prompt):] gradio_interface = gradio.Interface( fn=my_inference_function, inputs="text", outputs="text", examples=[ ["Jill"], ["Sam"] ], title="REST API with Gradio and Huggingface Spaces", description="This is a demo of how to build an AI powered REST API with Gradio and Huggingface Spaces – for free! Based on [this article](https://www.tomsoderlund.com/ai/building-ai-powered-rest-api). See the **Use via API** link at the bottom of this page.", article="© Tom Söderlund 2022" ) gradio_interface.launch()