import gradio as gr from transformers import T5ForConditionalGeneration, RobertaTokenizer # Load the quantized model and tokenizer from Hugging Face Hub quantized_model = T5ForConditionalGeneration.from_pretrained("AbdulHadi806/codet5-finetuned-latest-quantized") tokenizer = RobertaTokenizer.from_pretrained("AbdulHadi806/codet5-finetuned-latest-quantized") def inference(input_text): inputs = tokenizer(input_text, return_tensors="pt") outputs = quantized_model.generate(**inputs) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Create Gradio interface iface = gr.Interface(fn=inference, inputs="text", outputs="text") # Launch the interface iface.launch()