Spaces:

ugolefoo
/

bookscanner_app

Runtime error

File size: 10,133 Bytes

import cv2
import numpy as np
import pytesseract
import requests
import pandas as pd
import gradio as gr
import uuid
import os

# ──────────────────────────────────────────────────────────────
# 1. Utility: Detect rectangular contours (approximate book covers)
# ──────────────────────────────────────────────────────────────
def detect_book_regions(image: np.ndarray, min_area=5000, eps_coef=0.02):
    """
    Detect rectangular regions in an image that likely correspond to book covers.
    Returns a list of bounding boxes: (x, y, w, h).
    """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blurred, 50, 150)

    # Dilate + erode to close gaps
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)

    contours, _ = cv2.findContours(
        closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )
    boxes = []

    for cnt in contours:
        area = cv2.contourArea(cnt)
        if area < min_area:
            continue

        peri = cv2.arcLength(cnt, True)
        approx = cv2.approxPolyDP(cnt, eps_coef * peri, True)

        # Keep only quadrilaterals
        if len(approx) == 4:
            x, y, w, h = cv2.boundingRect(approx)
            ar = w / float(h)
            # Filter by typical book-cover aspect ratios
            # (you can loosen/tighten these ranges if needed)
            if 0.4 < ar < 0.9 or 1.0 < ar < 1.6:
                boxes.append((x, y, w, h))

    # Sort left→right, then top→bottom
    boxes = sorted(boxes, key=lambda b: (b[1], b[0]))
    return boxes

# ──────────────────────────────────────────────────────────────
# 2. OCR on a cropped region
# ──────────────────────────────────────────────────────────────
def ocr_on_region(image: np.ndarray, box: tuple):
    """
    Crop the image to the given box and run Tesseract OCR.
    Return the raw OCR text.
    """
    x, y, w, h = box
    cropped = image[y : y + h, x : x + w]
    gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
    _, thresh_crop = cv2.threshold(
        gray_crop, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )
    custom_config = r"--oem 3 --psm 6"
    text = pytesseract.image_to_string(thresh_crop, config=custom_config)
    return text.strip()

# ──────────────────────────────────────────────────────────────
# 3. OCR on the full image (fallback)
# ──────────────────────────────────────────────────────────────
def ocr_full_image(image: np.ndarray):
    """
    Run OCR on the entire image if no covers were detected.
    Return the full OCR text (string).
    """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Optionally threshold entire image as well
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    custom_config = r"--oem 3 --psm 6"
    text = pytesseract.image_to_string(thresh, config=custom_config)
    return text.strip()

# ──────────────────────────────────────────────────────────────
# 4. Query OpenLibrary API
# ──────────────────────────────────────────────────────────────
def query_openlibrary(title_text: str, author_text: str = None):
    """
    Search OpenLibrary by title (and optional author).
    Return a dict with title, author_name, publisher, first_publish_year, or None.
    """
    base_url = "https://openlibrary.org/search.json"
    params = {"title": title_text}
    if author_text:
        params["author"] = author_text

    try:
        resp = requests.get(base_url, params=params, timeout=5)
        resp.raise_for_status()
        data = resp.json()
        if data.get("docs"):
            doc = data["docs"][0]
            return {
                "title": doc.get("title", ""),
                "author_name": ", ".join(doc.get("author_name", [])),
                "publisher": ", ".join(doc.get("publisher", [])),
                "first_publish_year": doc.get("first_publish_year", ""),
            }
    except Exception as e:
        print(f"OpenLibrary query failed: {e}")

    return None

# ──────────────────────────────────────────────────────────────
# 5. Process one uploaded image
# ──────────────────────────────────────────────────────────────
def process_image(image_file):
    """
    Gradio passes a PIL image or numpy array. Convert to OpenCV BGR,
    detect covers → OCR → OpenLibrary.
    If no covers are found, fall back to OCR on the full image once.
    Write CSV to a temp file and return (DataFrame, filepath).
    """
    # Convert PIL to OpenCV BGR
    img = np.array(image_file)[:, :, ::-1].copy()

    # 1) Try to detect individual covers
    boxes = detect_book_regions(img)
    records = []

    if boxes:
        # If we found boxes, run OCR + lookup for each
        for box in boxes:
            ocr_text = ocr_on_region(img, box)
            lines = [l.strip() for l in ocr_text.splitlines() if l.strip()]
            if not lines:
                continue

            title_guess = lines[0]
            author_guess = lines[1] if len(lines) > 1 else None
            meta = query_openlibrary(title_guess, author_guess)
            if meta:
                records.append(meta)
            else:
                # No OpenLibrary match → still include OCR result
                records.append(
                    {
                        "title": title_guess,
                        "author_name": author_guess or "",
                        "publisher": "",
                        "first_publish_year": "",
                    }
                )
    else:
        # 2) FALLBACK: no boxes detected → OCR on full image once
        full_text = ocr_full_image(img)
        lines = [l.strip() for l in full_text.splitlines() if l.strip()]
        if lines:
            # Use first line as title guess, second (if any) as author guess
            title_guess = lines[0]
            author_guess = lines[1] if len(lines) > 1 else None
            meta = query_openlibrary(title_guess, author_guess)
            if meta:
                records.append(meta)
            else:
                records.append(
                    {
                        "title": title_guess,
                        "author_name": author_guess or "",
                        "publisher": "",
                        "first_publish_year": "",
                    }
                )
        # If lines is empty, records remains empty

    # Build DataFrame (even if empty)
    df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"])
    csv_bytes = df.to_csv(index=False).encode()

    # Write CSV to a unique temporary file
    unique_name = f"books_{uuid.uuid4().hex}.csv"
    temp_path = os.path.join("/tmp", unique_name)
    with open(temp_path, "wb") as f:
        f.write(csv_bytes)

    return df, temp_path

# ──────────────────────────────────────────────────────────────
# 6. Build the Gradio Interface
# ──────────────────────────────────────────────────────────────
def build_interface():
    with gr.Blocks(title="Book Cover Scanner") as demo:
        gr.Markdown(
            """
            ## Book Cover Scanner + Metadata Lookup

            1. Upload a photo containing one or multiple book covers  
            2. The app will:
               - Detect individual covers (rectangles).  
               - If any are found, OCR each one and query OpenLibrary for metadata.  
               - If **no** rectangles are detected, OCR the **entire image** once.  
            3. Display all detected/guessed books in a table.  
            4. Download a CSV of the results.  

            **Tips:**  
            - For best cover detection: use a flat, well-lit photo with minimal glare/obstructions.  
            - You can also place each cover on a plain background (e.g., a white tabletop).  
            """
        )

        with gr.Row():
            img_in = gr.Image(type="pil", label="Upload Image of Book Covers")
            run_button = gr.Button("Scan & Lookup")

        output_table = gr.Dataframe(
            headers=["title", "author_name", "publisher", "first_publish_year"],
            label="Detected Books + Metadata",
            datatype="pandas",
        )
        download_file = gr.File(label="Download CSV")

        def on_run(image):
            df, filepath = process_image(image)
            return df, filepath

        run_button.click(
            fn=on_run,
            inputs=[img_in],
            outputs=[output_table, download_file],
        )

    return demo

if __name__ == "__main__":
    demo_app = build_interface()
    demo_app.launch()