Spaces:
Runtime error
Runtime error
import cv2 | |
import numpy as np | |
import pytesseract | |
import requests | |
import pandas as pd | |
import gradio as gr | |
import uuid | |
import os | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 1. Utility: Detect rectangular contours (approximate book covers) | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def detect_book_regions(image: np.ndarray, min_area=5000, eps_coef=0.02): | |
""" | |
Detect rectangular regions in an image that likely correspond to book covers. | |
Returns a list of bounding boxes: (x, y, w, h). | |
""" | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
blurred = cv2.GaussianBlur(gray, (5, 5), 0) | |
edges = cv2.Canny(blurred, 50, 150) | |
# Dilate + erode to close gaps | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) | |
closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel) | |
contours, _ = cv2.findContours( | |
closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE | |
) | |
boxes = [] | |
for cnt in contours: | |
area = cv2.contourArea(cnt) | |
if area < min_area: | |
continue | |
peri = cv2.arcLength(cnt, True) | |
approx = cv2.approxPolyDP(cnt, eps_coef * peri, True) | |
# Keep only quadrilaterals | |
if len(approx) == 4: | |
x, y, w, h = cv2.boundingRect(approx) | |
ar = w / float(h) | |
# Filter by typical book-cover aspect ratios | |
# (you can loosen/tighten these ranges if needed) | |
if 0.4 < ar < 0.9 or 1.0 < ar < 1.6: | |
boxes.append((x, y, w, h)) | |
# Sort leftβright, then topβbottom | |
boxes = sorted(boxes, key=lambda b: (b[1], b[0])) | |
return boxes | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 2. OCR on a cropped region | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def ocr_on_region(image: np.ndarray, box: tuple): | |
""" | |
Crop the image to the given box and run Tesseract OCR. | |
Return the raw OCR text. | |
""" | |
x, y, w, h = box | |
cropped = image[y : y + h, x : x + w] | |
gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY) | |
_, thresh_crop = cv2.threshold( | |
gray_crop, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU | |
) | |
custom_config = r"--oem 3 --psm 6" | |
text = pytesseract.image_to_string(thresh_crop, config=custom_config) | |
return text.strip() | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 3. OCR on the full image (fallback) | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def ocr_full_image(image: np.ndarray): | |
""" | |
Run OCR on the entire image if no covers were detected. | |
Return the full OCR text (string). | |
""" | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
# Optionally threshold entire image as well | |
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
custom_config = r"--oem 3 --psm 6" | |
text = pytesseract.image_to_string(thresh, config=custom_config) | |
return text.strip() | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 4. Query OpenLibrary API | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def query_openlibrary(title_text: str, author_text: str = None): | |
""" | |
Search OpenLibrary by title (and optional author). | |
Return a dict with title, author_name, publisher, first_publish_year, or None. | |
""" | |
base_url = "https://openlibrary.org/search.json" | |
params = {"title": title_text} | |
if author_text: | |
params["author"] = author_text | |
try: | |
resp = requests.get(base_url, params=params, timeout=5) | |
resp.raise_for_status() | |
data = resp.json() | |
if data.get("docs"): | |
doc = data["docs"][0] | |
return { | |
"title": doc.get("title", ""), | |
"author_name": ", ".join(doc.get("author_name", [])), | |
"publisher": ", ".join(doc.get("publisher", [])), | |
"first_publish_year": doc.get("first_publish_year", ""), | |
} | |
except Exception as e: | |
print(f"OpenLibrary query failed: {e}") | |
return None | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 5. Process one uploaded image | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def process_image(image_file): | |
""" | |
Gradio passes a PIL image or numpy array. Convert to OpenCV BGR, | |
detect covers β OCR β OpenLibrary. | |
If no covers are found, fall back to OCR on the full image once. | |
Write CSV to a temp file and return (DataFrame, filepath). | |
""" | |
# Convert PIL to OpenCV BGR | |
img = np.array(image_file)[:, :, ::-1].copy() | |
# 1) Try to detect individual covers | |
boxes = detect_book_regions(img) | |
records = [] | |
if boxes: | |
# If we found boxes, run OCR + lookup for each | |
for box in boxes: | |
ocr_text = ocr_on_region(img, box) | |
lines = [l.strip() for l in ocr_text.splitlines() if l.strip()] | |
if not lines: | |
continue | |
title_guess = lines[0] | |
author_guess = lines[1] if len(lines) > 1 else None | |
meta = query_openlibrary(title_guess, author_guess) | |
if meta: | |
records.append(meta) | |
else: | |
# No OpenLibrary match β still include OCR result | |
records.append( | |
{ | |
"title": title_guess, | |
"author_name": author_guess or "", | |
"publisher": "", | |
"first_publish_year": "", | |
} | |
) | |
else: | |
# 2) FALLBACK: no boxes detected β OCR on full image once | |
full_text = ocr_full_image(img) | |
lines = [l.strip() for l in full_text.splitlines() if l.strip()] | |
if lines: | |
# Use first line as title guess, second (if any) as author guess | |
title_guess = lines[0] | |
author_guess = lines[1] if len(lines) > 1 else None | |
meta = query_openlibrary(title_guess, author_guess) | |
if meta: | |
records.append(meta) | |
else: | |
records.append( | |
{ | |
"title": title_guess, | |
"author_name": author_guess or "", | |
"publisher": "", | |
"first_publish_year": "", | |
} | |
) | |
# If lines is empty, records remains empty | |
# Build DataFrame (even if empty) | |
df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"]) | |
csv_bytes = df.to_csv(index=False).encode() | |
# Write CSV to a unique temporary file | |
unique_name = f"books_{uuid.uuid4().hex}.csv" | |
temp_path = os.path.join("/tmp", unique_name) | |
with open(temp_path, "wb") as f: | |
f.write(csv_bytes) | |
return df, temp_path | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 6. Build the Gradio Interface | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def build_interface(): | |
with gr.Blocks(title="Book Cover Scanner") as demo: | |
gr.Markdown( | |
""" | |
## Book Cover Scanner + Metadata Lookup | |
1. Upload a photo containing one or multiple book covers | |
2. The app will: | |
- Detect individual covers (rectangles). | |
- If any are found, OCR each one and query OpenLibrary for metadata. | |
- If **no** rectangles are detected, OCR the **entire image** once. | |
3. Display all detected/guessed books in a table. | |
4. Download a CSV of the results. | |
**Tips:** | |
- For best cover detection: use a flat, well-lit photo with minimal glare/obstructions. | |
- You can also place each cover on a plain background (e.g., a white tabletop). | |
""" | |
) | |
with gr.Row(): | |
img_in = gr.Image(type="pil", label="Upload Image of Book Covers") | |
run_button = gr.Button("Scan & Lookup") | |
output_table = gr.Dataframe( | |
headers=["title", "author_name", "publisher", "first_publish_year"], | |
label="Detected Books + Metadata", | |
datatype="pandas", | |
) | |
download_file = gr.File(label="Download CSV") | |
def on_run(image): | |
df, filepath = process_image(image) | |
return df, filepath | |
run_button.click( | |
fn=on_run, | |
inputs=[img_in], | |
outputs=[output_table, download_file], | |
) | |
return demo | |
if __name__ == "__main__": | |
demo_app = build_interface() | |
demo_app.launch() | |