File size: 1,699 Bytes
629cd39
5ff1d6b
55e5fa2
629cd39
 
2596f81
5ff1d6b
be347f9
 
 
 
 
805a805
 
 
be347f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
629cd39
 
be347f9
629cd39
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
from transformers import T5ForConditionalGeneration, AutoTokenizer
import torch

# Load the quantized model and tokenizer from Hugging Face Hub
quantized_model = T5ForConditionalGeneration.from_pretrained("AbdulHadi806/codeT5-finetuned-LaTexToPythonCode-30kDataset")
tokenizer = AutoTokenizer.from_pretrained("AbdulHadi806/codeT5-finetuned-LaTexToPythonCode-30kDataset")
    
def preprocess_infer_input(text):
    # Assuming the input is already a string, we don't need to access it as a dictionary
    return f"latex: {text}"

def postprocess_output(text):
    return text.replace('<newline>', '\n')

def clean_generated_code(generated_code):
    # Remove unwanted parts
    print(':::generated_code::::', generated_code)
    cleaned_code = generated_code.replace('*convert(latex, python.code)', '').strip()

    # Optionally, format the code for better readability
    cleaned_code = cleaned_code.replace('\n', '\n').replace('  ', ' ')  # Adjust spacing if needed

    return cleaned_code

def generate_solution(input_text):
    input_text = preprocess_infer_input(input_text)
    print(input_text)
    
    input_ids = tokenizer(input_text, return_tensors='pt', padding="max_length", truncation=True, max_length=128).input_ids

    with torch.no_grad():
        outputs = quantized_model.generate(input_ids, max_length=128, num_beams=4, early_stopping=True)

    predicted_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    cleaned_code = clean_generated_code(postprocess_output(predicted_text))
    return cleaned_code



# Create Gradio interface
iface = gr.Interface(fn=generate_solution, inputs="text", outputs="text")

# Launch the interface
iface.launch()