import gradio as gr import logging from roboflow import Roboflow from PIL import Image, ImageDraw import cv2 import numpy as np import os from math import atan2, degrees import asyncio from pyppeteer import launch # Configure logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("debug.log"), logging.StreamHandler() ] ) # Roboflow and model configuration ROBOFLOW_API_KEY = "KUP9w62eUcD5PrrRMJsV" # Replace with your API key PROJECT_NAME = "model_verification_project" VERSION_NUMBER = 2 # FONT_PATH is no longer used since we generate handwriting via Calligraphr # FONT_PATH = "./STEVEHANDWRITING-REGULAR.TTF" # ---------------------------- # Pyppeteer: Generate handwriting image via Calligraphr # ---------------------------- async def generate_handwriting_text_image(text_prompt, screenshot_path): browser = await launch(headless=True, args=['--no-sandbox', '--disable-setuid-sandbox']) page = await browser.newPage() # Navigate to Calligraphr (adjust URL if needed) await page.goto('https://www.calligraphr.com/en/font/', {'waitUntil': 'networkidle2'}) # Wait for the text input to be available and type the text await page.waitForSelector('#text-input') await page.type('#text-input', text_prompt) # Wait for the page to render the handwriting preview await asyncio.sleep(2) # Take a screenshot of the area containing the rendered handwriting text. # (Adjust the clip values if needed to capture the correct area.) await page.screenshot({ 'path': screenshot_path, 'clip': {'x': 100, 'y': 200, 'width': 600, 'height': 150} }) await browser.close() logging.debug(f"Calligraphr screenshot saved at {screenshot_path}") return screenshot_path # ---------------------------- # Helper: Detect paper angle within bounding box # ---------------------------- def detect_paper_angle(image, bounding_box): x1, y1, x2, y2 = bounding_box # Crop the region of interest (ROI) based on the bounding box roi = np.array(image)[y1:y2, x1:x2] # Convert ROI to grayscale gray = cv2.cvtColor(roi, cv2.COLOR_RGBA2GRAY) # Apply edge detection edges = cv2.Canny(gray, 50, 150) # Detect lines using Hough Line Transformation lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100, minLineLength=50, maxLineGap=10) if lines is not None: # Find the longest line (most prominent edge) longest_line = max(lines, key=lambda line: np.linalg.norm((line[0][2] - line[0][0], line[0][3] - line[0][1]))) x1_line, y1_line, x2_line, y2_line = longest_line[0] # Calculate the angle of the line relative to the horizontal axis dx = x2_line - x1_line dy = y2_line - y1_line angle = degrees(atan2(dy, dx)) return angle # Angle of the paper else: return 0 # Default to no rotation if no lines are found # ---------------------------- # Main processing function # ---------------------------- def process_image(image, text): try: # Initialize Roboflow rf = Roboflow(api_key=ROBOFLOW_API_KEY) logging.debug("Initialized Roboflow API.") project = rf.workspace().project(PROJECT_NAME) logging.debug("Accessed project in Roboflow.") model = project.version(VERSION_NUMBER).model logging.debug("Loaded model from Roboflow.") # Save input image temporarily input_image_path = "/tmp/input_image.jpg" image.save(input_image_path) logging.debug(f"Input image saved to {input_image_path}.") # Perform inference logging.debug("Performing inference on the image...") prediction = model.predict(input_image_path, confidence=70, overlap=50).json() logging.debug(f"Inference result: {prediction}") # Open the image for processing pil_image = image.convert("RGBA") logging.debug("Converted image to RGBA mode.") # Iterate over detected objects (assumed white papers) for obj in prediction['predictions']: # Use white paper dimensions from the prediction white_paper_width = obj['width'] white_paper_height = obj['height'] # Set padding (adjust percentages as needed) padding_x = int(white_paper_width * 0.1) padding_y = int(white_paper_height * 0.1) box_width = white_paper_width - 2 * padding_x box_height = white_paper_height - 2 * padding_y logging.debug(f"Padded white paper dimensions: width={box_width}, height={box_height}.") # Calculate padded coordinates x1_padded = int(obj['x'] - white_paper_width / 2 + padding_x) y1_padded = int(obj['y'] - white_paper_height / 2 + padding_y) x2_padded = int(obj['x'] + white_paper_width / 2 - padding_x) y2_padded = int(obj['y'] + white_paper_height / 2 - padding_y) # Detect paper angle angle = detect_paper_angle(np.array(image), (x1_padded, y1_padded, x2_padded, y2_padded)) logging.debug(f"Detected paper angle: {angle} degrees.") # For debugging: draw the bounding box (optional) debug_layer = pil_image.copy() debug_draw = ImageDraw.Draw(debug_layer) debug_draw.rectangle([(x1_padded, y1_padded), (x2_padded, y2_padded)], outline="red", width=3) debug_layer.save("/tmp/debug_bounding_box.png") logging.debug("Saved bounding box debug image to /tmp/debug_bounding_box.png.") # -------------------------------------------- # New: Generate handwriting image via Calligraphr # -------------------------------------------- handwriting_path = "/tmp/handwriting.png" try: # Run the async Pyppeteer function to generate handwriting handwriting_path = asyncio.run(generate_handwriting_text_image(text, handwriting_path)) except Exception as e: logging.error(f"Error generating handwriting image: {e}") continue # Optionally, you could fall back to another method here # Open the generated handwriting image handwriting_img = Image.open(handwriting_path).convert("RGBA") # Resize handwriting image to fit the white paper box handwriting_img = handwriting_img.resize((box_width, box_height), Image.ANTIALIAS) # Rotate the handwriting image to align with the detected paper angle rotated_handwriting = handwriting_img.rotate(-angle, resample=Image.BICUBIC, expand=True) # Composite the rotated handwriting image onto a transparent layer, # then overlay it on the original image text_layer = Image.new("RGBA", pil_image.size, (255, 255, 255, 0)) paste_x = int(obj['x'] - rotated_handwriting.size[0] / 2) paste_y = int(obj['y'] - rotated_handwriting.size[1] / 2) text_layer.paste(rotated_handwriting, (paste_x, paste_y), rotated_handwriting) pil_image = Image.alpha_composite(pil_image, text_layer) logging.debug("Handwriting layer composited onto the original image.") # Save and return output image path output_image_path = "/tmp/output_image.png" pil_image.convert("RGB").save(output_image_path) logging.debug(f"Output image saved to {output_image_path}.") return output_image_path except Exception as e: logging.error(f"Error during image processing: {e}") return None # ---------------------------- # Gradio interface function # ---------------------------- def gradio_inference(image, text): logging.debug("Starting Gradio inference.") result_path = process_image(image, text) if result_path: logging.debug("Gradio inference successful.") return result_path, result_path, "Processing complete! Download the image below." logging.error("Gradio inference failed.") return None, None, "An error occurred while processing the image. Please check the logs." # ---------------------------- # Gradio interface definition # ---------------------------- interface = gr.Interface( fn=gradio_inference, inputs=[ gr.Image(type="pil", label="Upload an Image"), gr.Textbox(label="Enter Text to Overlay") ], outputs=[ gr.Image(label="Processed Image Preview"), # Preview processed image gr.File(label="Download Processed Image"), # Download the image gr.Textbox(label="Status") # Status message ], title="Roboflow Detection with Handwriting Overlay", description="Upload an image, enter text to overlay. The Roboflow model detects the white paper area, and a handwriting image is generated via Calligraphr using Pyppeteer. The output image is composited accordingly.", allow_flagging="never" ) # Launch the Gradio app if __name__ == "__main__": logging.debug("Launching Gradio interface.") interface.launch(share=True)