AbdulHadi806's picture
Update app.py
5ff1d6b verified
raw
history blame
1.7 kB
import gradio as gr
from transformers import T5ForConditionalGeneration, AutoTokenizer
import torch
# Load the quantized model and tokenizer from Hugging Face Hub
quantized_model = T5ForConditionalGeneration.from_pretrained("AbdulHadi806/codeT5-finetuned-LaTexToPythonCode-30kDataset")
tokenizer = AutoTokenizer.from_pretrained("AbdulHadi806/codeT5-finetuned-LaTexToPythonCode-30kDataset")
def preprocess_infer_input(text):
# Assuming the input is already a string, we don't need to access it as a dictionary
return f"latex: {text}"
def postprocess_output(text):
return text.replace('<newline>', '\n')
def clean_generated_code(generated_code):
# Remove unwanted parts
print(':::generated_code::::', generated_code)
cleaned_code = generated_code.replace('*convert(latex, python.code)', '').strip()
# Optionally, format the code for better readability
cleaned_code = cleaned_code.replace('\n', '\n').replace(' ', ' ') # Adjust spacing if needed
return cleaned_code
def generate_solution(input_text):
input_text = preprocess_infer_input(input_text)
print(input_text)
input_ids = tokenizer(input_text, return_tensors='pt', padding="max_length", truncation=True, max_length=128).input_ids
with torch.no_grad():
outputs = quantized_model.generate(input_ids, max_length=128, num_beams=4, early_stopping=True)
predicted_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
cleaned_code = clean_generated_code(postprocess_output(predicted_text))
return cleaned_code
# Create Gradio interface
iface = gr.Interface(fn=generate_solution, inputs="text", outputs="text")
# Launch the interface
iface.launch()