Spaces:

ugolefoo
/

bookscanner_app

Runtime error

App Files Files Community

bookscanner_app / app.py

ugolefoo

Update app.py

cece48d verified 3 months ago

raw

history blame

10.1 kB

	import cv2
	import numpy as np
	import pytesseract
	import requests
	import pandas as pd
	import gradio as gr
	import uuid
	import os

	# ──────────────────────────────────────────────────────────────
	# 1. Utility: Detect rectangular contours (approximate book covers)
	# ──────────────────────────────────────────────────────────────
	def detect_book_regions(image: np.ndarray, min_area=5000, eps_coef=0.02):
	"""
	Detect rectangular regions in an image that likely correspond to book covers.
	Returns a list of bounding boxes: (x, y, w, h).
	"""
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	blurred = cv2.GaussianBlur(gray, (5, 5), 0)
	edges = cv2.Canny(blurred, 50, 150)

	# Dilate + erode to close gaps
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
	closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)

	contours, _ = cv2.findContours(
	closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
	)
	boxes = []

	for cnt in contours:
	area = cv2.contourArea(cnt)
	if area < min_area:
	continue

	peri = cv2.arcLength(cnt, True)
	approx = cv2.approxPolyDP(cnt, eps_coef * peri, True)

	# Keep only quadrilaterals
	if len(approx) == 4:
	x, y, w, h = cv2.boundingRect(approx)
	ar = w / float(h)
	# Filter by typical book-cover aspect ratios
	# (you can loosen/tighten these ranges if needed)
	if 0.4 < ar < 0.9 or 1.0 < ar < 1.6:
	boxes.append((x, y, w, h))

	# Sort left→right, then top→bottom
	boxes = sorted(boxes, key=lambda b: (b[1], b[0]))
	return boxes

	# ──────────────────────────────────────────────────────────────
	# 2. OCR on a cropped region
	# ──────────────────────────────────────────────────────────────
	def ocr_on_region(image: np.ndarray, box: tuple):
	"""
	Crop the image to the given box and run Tesseract OCR.
	Return the raw OCR text.
	"""
	x, y, w, h = box
	cropped = image[y : y + h, x : x + w]
	gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
	_, thresh_crop = cv2.threshold(
	gray_crop, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
	)
	custom_config = r"--oem 3 --psm 6"
	text = pytesseract.image_to_string(thresh_crop, config=custom_config)
	return text.strip()

	# ──────────────────────────────────────────────────────────────
	# 3. OCR on the full image (fallback)
	# ──────────────────────────────────────────────────────────────
	def ocr_full_image(image: np.ndarray):
	"""
	Run OCR on the entire image if no covers were detected.
	Return the full OCR text (string).
	"""
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	# Optionally threshold entire image as well
	_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
	custom_config = r"--oem 3 --psm 6"
	text = pytesseract.image_to_string(thresh, config=custom_config)
	return text.strip()

	# ──────────────────────────────────────────────────────────────
	# 4. Query OpenLibrary API
	# ──────────────────────────────────────────────────────────────
	def query_openlibrary(title_text: str, author_text: str = None):
	"""
	Search OpenLibrary by title (and optional author).
	Return a dict with title, author_name, publisher, first_publish_year, or None.
	"""
	base_url = "https://openlibrary.org/search.json"
	params = {"title": title_text}
	if author_text:
	params["author"] = author_text

	try:
	resp = requests.get(base_url, params=params, timeout=5)
	resp.raise_for_status()
	data = resp.json()
	if data.get("docs"):
	doc = data["docs"][0]
	return {
	"title": doc.get("title", ""),
	"author_name": ", ".join(doc.get("author_name", [])),
	"publisher": ", ".join(doc.get("publisher", [])),
	"first_publish_year": doc.get("first_publish_year", ""),
	}
	except Exception as e:
	print(f"OpenLibrary query failed: {e}")

	return None

	# ──────────────────────────────────────────────────────────────
	# 5. Process one uploaded image
	# ──────────────────────────────────────────────────────────────
	def process_image(image_file):
	"""
	Gradio passes a PIL image or numpy array. Convert to OpenCV BGR,
	detect covers → OCR → OpenLibrary.
	If no covers are found, fall back to OCR on the full image once.
	Write CSV to a temp file and return (DataFrame, filepath).
	"""
	# Convert PIL to OpenCV BGR
	img = np.array(image_file)[:, :, ::-1].copy()

	# 1) Try to detect individual covers
	boxes = detect_book_regions(img)
	records = []

	if boxes:
	# If we found boxes, run OCR + lookup for each
	for box in boxes:
	ocr_text = ocr_on_region(img, box)
	lines = [l.strip() for l in ocr_text.splitlines() if l.strip()]
	if not lines:
	continue

	title_guess = lines[0]
	author_guess = lines[1] if len(lines) > 1 else None
	meta = query_openlibrary(title_guess, author_guess)
	if meta:
	records.append(meta)
	else:
	# No OpenLibrary match → still include OCR result
	records.append(
	{
	"title": title_guess,
	"author_name": author_guess or "",
	"publisher": "",
	"first_publish_year": "",
	}
	)
	else:
	# 2) FALLBACK: no boxes detected → OCR on full image once
	full_text = ocr_full_image(img)
	lines = [l.strip() for l in full_text.splitlines() if l.strip()]
	if lines:
	# Use first line as title guess, second (if any) as author guess
	title_guess = lines[0]
	author_guess = lines[1] if len(lines) > 1 else None
	meta = query_openlibrary(title_guess, author_guess)
	if meta:
	records.append(meta)
	else:
	records.append(
	{
	"title": title_guess,
	"author_name": author_guess or "",
	"publisher": "",
	"first_publish_year": "",
	}
	)
	# If lines is empty, records remains empty

	# Build DataFrame (even if empty)
	df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"])
	csv_bytes = df.to_csv(index=False).encode()

	# Write CSV to a unique temporary file
	unique_name = f"books_{uuid.uuid4().hex}.csv"
	temp_path = os.path.join("/tmp", unique_name)
	with open(temp_path, "wb") as f:
	f.write(csv_bytes)

	return df, temp_path

	# ──────────────────────────────────────────────────────────────
	# 6. Build the Gradio Interface
	# ──────────────────────────────────────────────────────────────
	def build_interface():
	with gr.Blocks(title="Book Cover Scanner") as demo:
	gr.Markdown(
	"""
	## Book Cover Scanner + Metadata Lookup

	1. Upload a photo containing one or multiple book covers
	2. The app will:
	- Detect individual covers (rectangles).
	- If any are found, OCR each one and query OpenLibrary for metadata.
	- If no rectangles are detected, OCR the entire image once.
	3. Display all detected/guessed books in a table.
	4. Download a CSV of the results.

	Tips:
	- For best cover detection: use a flat, well-lit photo with minimal glare/obstructions.
	- You can also place each cover on a plain background (e.g., a white tabletop).
	"""
	)

	with gr.Row():
	img_in = gr.Image(type="pil", label="Upload Image of Book Covers")
	run_button = gr.Button("Scan & Lookup")

	output_table = gr.Dataframe(
	headers=["title", "author_name", "publisher", "first_publish_year"],
	label="Detected Books + Metadata",
	datatype="pandas",
	)
	download_file = gr.File(label="Download CSV")

	def on_run(image):
	df, filepath = process_image(image)
	return df, filepath

	run_button.click(
	fn=on_run,
	inputs=[img_in],
	outputs=[output_table, download_file],
	)

	return demo

	if __name__ == "__main__":
	demo_app = build_interface()
	demo_app.launch()