bookscanner_app / app.py
ugolefoo's picture
Update app.py
d668e84 verified
raw
history blame
6.29 kB
import cv2
import numpy as np
import pytesseract
import requests
import pandas as pd
import gradio as gr
import uuid
import os
# ──────────────────────────────────────────────────────────────
# 1. OCR on the full image (always)
# ──────────────────────────────────────────────────────────────
def ocr_full_image(image: np.ndarray) -> str:
"""
Run Tesseract OCR on the entire image (no thresholding).
Return the raw OCR text.
"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Note: we’re NOT thresholding hereβ€”sometimes stylized covers lose detail under THRESH_OTSU.
text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6")
return text.strip()
# ──────────────────────────────────────────────────────────────
# 2. Query OpenLibrary API
# ──────────────────────────────────────────────────────────────
def query_openlibrary(title_text: str, author_text: str = None) -> dict | None:
"""
Search OpenLibrary by title (and optional author).
Return a dict with title, author_name, publisher, first_publish_year, or None.
"""
base_url = "https://openlibrary.org/search.json"
params = {"title": title_text}
if author_text:
params["author"] = author_text
try:
resp = requests.get(base_url, params=params, timeout=5)
resp.raise_for_status()
data = resp.json()
if data.get("docs"):
doc = data["docs"][0]
return {
"title": doc.get("title", ""),
"author_name": ", ".join(doc.get("author_name", [])),
"publisher": ", ".join(doc.get("publisher", [])),
"first_publish_year": doc.get("first_publish_year", ""),
}
except Exception as e:
print(f"OpenLibrary query failed: {e}")
return None
# ──────────────────────────────────────────────────────────────
# 3. Process one uploaded image (single OCR pass)
# ──────────────────────────────────────────────────────────────
def process_image(image_file):
"""
Gradio passes a PIL image or numpy array. Convert to OpenCV BGR,
OCR the entire image, parse first two lines for title/author,
query OpenLibrary once, and return a DataFrame + CSV file path.
"""
# Convert PIL to OpenCV BGR
img = np.array(image_file)[:, :, ::-1].copy()
# 1) Run OCR on full image
full_text = ocr_full_image(img)
lines = [line.strip() for line in full_text.splitlines() if line.strip()]
records = []
if lines:
# Use first line as title, second (if exists) as author
title_guess = lines[0]
author_guess = lines[1] if len(lines) > 1 else None
meta = query_openlibrary(title_guess, author_guess)
if meta:
records.append(meta)
else:
# No match β†’ still include OCR guesses
records.append({
"title": title_guess,
"author_name": author_guess or "",
"publisher": "",
"first_publish_year": "",
})
# Build DataFrame (even if empty)
df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"])
csv_bytes = df.to_csv(index=False).encode()
# Write CSV to a unique temporary file
unique_name = f"books_{uuid.uuid4().hex}.csv"
temp_path = os.path.join("/tmp", unique_name)
with open(temp_path, "wb") as f:
f.write(csv_bytes)
return df, temp_path
# ──────────────────────────────────────────────────────────────
# 4. Build the Gradio Interface
# ──────────────────────────────────────────────────────────────
def build_interface():
with gr.Blocks(title="Book Cover OCR + Lookup (Single‐Cover Mode)") as demo:
gr.Markdown(
"""
## Book Cover OCR + OpenLibrary Lookup
1. Upload a photo of a single book cover (or any cover‐style image).
2. The app will run OCR on the full image, take:
- the **first line** as a β€œtitle” guess, and
- the **second line** (if any) as an β€œauthor” guess, then
- query OpenLibrary once for metadata.
3. You’ll see the result in a table and can download a CSV.
> **Note:**
> β€’ Because we skip rectangle detection, any visible text on your cover (large, legible fonts) should be picked up.
> β€’ If you have multiple covers in one photo, only the first β€œtitle/author” will be used.
"""
)
with gr.Row():
img_in = gr.Image(type="pil", label="Upload Single Book Cover")
run_button = gr.Button("Scan & Lookup")
output_table = gr.Dataframe(
headers=["title", "author_name", "publisher", "first_publish_year"],
label="Detected Book Metadata",
datatype="pandas",
)
download_file = gr.File(label="Download CSV")
def on_run(image):
df, filepath = process_image(image)
return df, filepath
run_button.click(
fn=on_run,
inputs=[img_in],
outputs=[output_table, download_file],
)
return demo
if __name__ == "__main__":
demo_app = build_interface()
demo_app.launch()