import os from threading import Thread from typing import Iterator import gradio as gr import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import pipeline model = pipeline("text-generation" , model="appvoid/text-arco") @spaces.GPU def predict(prompt): completion = model(prompt, max_new_tokens=64, temperature=0.3)[0]["generated_text"] return completion gr.Interface( fn=predict, inputs="text", outputs="text", title="text arco", ).launch()