# app.py import gradio as gr from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image from sympy import sympify, solve, Eq, symbols # Load the math OCR model and processor processor = TrOCRProcessor.from_pretrained("nlpai-lab/mathocr-htr-base") model = VisionEncoderDecoderModel.from_pretrained("nlpai-lab/mathocr-htr-base") def predict_math_problem(image): try: # Transcribe the handwritten math problem image = image.convert("RGB") pixel_values = processor(image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] # Standardize mathematical symbols in the transcription transcription = (transcription .replace("×", "*") .replace("÷", "/") .replace("−", "-") .replace("√", "sqrt") .replace("²", "**2") .replace("³", "**3") .replace("½", "1/2") .replace("¼", "1/4") .replace("…", "...") # Ellipsis ) # Attempt to solve the mathematical problem solution = None try: # Check if the transcription is an equation (contains '=') if '=' in transcription: lhs, rhs = transcription.split('=', 1) equation = Eq(sympify(lhs.strip()), sympify(rhs.strip())) variables = equation.free_symbols if variables: variable = variables.pop() solution = solve(equation, variable) solution = f"{variable} = {solution}" else: solution = "No variables found in equation" else: # Treat as an arithmetic expression solution = sympify(transcription) solution = f"Result: {solution}" except: solution = "Invalid or unsolvable expression" return transcription, solution except Exception as e: return f"Error: {str(e)}", "Failed to process" # Create Gradio interface demo = gr.Interface( fn=predict_math_problem, inputs=gr.Image(type="pil", label="Upload Handwritten Math Problem"), outputs=[ gr.Textbox(label="Transcribed Text"), gr.Textbox(label="Solution") ], title="Handwritten Math Solver", description="Upload a handwritten math problem to get its transcription and solution." ) if __name__ == "__main__": demo.launch()