Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -54,6 +54,115 @@ def ocr_on_region(image: np.ndarray, box: tuple):
|
|
| 54 |
Return the raw OCR text.
|
| 55 |
"""
|
| 56 |
x, y, w, h = box
|
| 57 |
-
cropped = image[y:y+h, x:x+w]
|
| 58 |
gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
|
| 59 |
-
_, thresh_crop = cv2.threshold(gray_crop, 0, 255, cv2.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
Return the raw OCR text.
|
| 55 |
"""
|
| 56 |
x, y, w, h = box
|
| 57 |
+
cropped = image[y:y + h, x:x + w]
|
| 58 |
gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
|
| 59 |
+
_, thresh_crop = cv2.threshold(gray_crop, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 60 |
+
custom_config = r'--oem 3 --psm 6'
|
| 61 |
+
text = pytesseract.image_to_string(thresh_crop, config=custom_config)
|
| 62 |
+
return text.strip()
|
| 63 |
+
|
| 64 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 65 |
+
# 3. Query OpenLibrary API
|
| 66 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 67 |
+
def query_openlibrary(title_text: str, author_text: str = None):
|
| 68 |
+
"""
|
| 69 |
+
Search OpenLibrary by title (and optional author).
|
| 70 |
+
Return a dict with title, author_name, publisher, first_publish_year, or None.
|
| 71 |
+
"""
|
| 72 |
+
base_url = "https://openlibrary.org/search.json"
|
| 73 |
+
params = {"title": title_text}
|
| 74 |
+
if author_text:
|
| 75 |
+
params["author"] = author_text
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
resp = requests.get(base_url, params=params, timeout=5)
|
| 79 |
+
resp.raise_for_status()
|
| 80 |
+
data = resp.json()
|
| 81 |
+
if data.get("docs"):
|
| 82 |
+
doc = data["docs"][0]
|
| 83 |
+
return {
|
| 84 |
+
"title": doc.get("title", ""),
|
| 85 |
+
"author_name": ", ".join(doc.get("author_name", [])),
|
| 86 |
+
"publisher": ", ".join(doc.get("publisher", [])),
|
| 87 |
+
"first_publish_year": doc.get("first_publish_year", "")
|
| 88 |
+
}
|
| 89 |
+
except Exception as e:
|
| 90 |
+
print(f"OpenLibrary query failed: {e}")
|
| 91 |
+
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 95 |
+
# 4. Process one uploaded image
|
| 96 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 97 |
+
def process_image(image_file):
|
| 98 |
+
"""
|
| 99 |
+
Gradio passes a PIL image or numpy array. Convert to OpenCV BGR, detect covers β OCR β OpenLibrary.
|
| 100 |
+
Return a DataFrame and CSV bytes.
|
| 101 |
+
"""
|
| 102 |
+
img = np.array(image_file)[:, :, ::-1].copy() # PIL to OpenCV BGR
|
| 103 |
+
boxes = detect_book_regions(img)
|
| 104 |
+
records = []
|
| 105 |
+
|
| 106 |
+
for box in boxes:
|
| 107 |
+
ocr_text = ocr_on_region(img, box)
|
| 108 |
+
lines = [l.strip() for l in ocr_text.splitlines() if l.strip()]
|
| 109 |
+
if not lines:
|
| 110 |
+
continue
|
| 111 |
+
|
| 112 |
+
title_guess = lines[0]
|
| 113 |
+
author_guess = lines[1] if len(lines) > 1 else None
|
| 114 |
+
meta = query_openlibrary(title_guess, author_guess)
|
| 115 |
+
|
| 116 |
+
if meta:
|
| 117 |
+
records.append(meta)
|
| 118 |
+
else:
|
| 119 |
+
records.append({
|
| 120 |
+
"title": title_guess,
|
| 121 |
+
"author_name": author_guess or "",
|
| 122 |
+
"publisher": "",
|
| 123 |
+
"first_publish_year": "",
|
| 124 |
+
})
|
| 125 |
+
|
| 126 |
+
if not records:
|
| 127 |
+
df_empty = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"])
|
| 128 |
+
return df_empty, df_empty.to_csv(index=False).encode()
|
| 129 |
+
|
| 130 |
+
df = pd.DataFrame(records)
|
| 131 |
+
csv_bytes = df.to_csv(index=False).encode()
|
| 132 |
+
return df, csv_bytes
|
| 133 |
+
|
| 134 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 135 |
+
# 5. Build the Gradio Interface
|
| 136 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 137 |
+
def build_interface():
|
| 138 |
+
with gr.Blocks(title="Book Cover Scanner") as demo:
|
| 139 |
+
gr.Markdown(
|
| 140 |
+
"""
|
| 141 |
+
## Book Cover Scanner + Metadata Lookup
|
| 142 |
+
1. Upload a photo containing one or multiple book covers
|
| 143 |
+
2. The app will detect each cover, run OCR, then query OpenLibrary for metadata
|
| 144 |
+
3. Results appear in a table below, and you can download a CSV
|
| 145 |
+
"""
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
with gr.Row():
|
| 149 |
+
img_in = gr.Image(type="pil", label="Upload Image of Book Covers")
|
| 150 |
+
run_button = gr.Button("Scan & Lookup")
|
| 151 |
+
|
| 152 |
+
output_table = gr.Dataframe(
|
| 153 |
+
headers=["title", "author_name", "publisher", "first_publish_year"],
|
| 154 |
+
label="Detected Books with Metadata"
|
| 155 |
+
)
|
| 156 |
+
download_btn = gr.Download(label="Download CSV")
|
| 157 |
+
|
| 158 |
+
def on_run(image):
|
| 159 |
+
df, csv_bytes = process_image(image)
|
| 160 |
+
return df, csv_bytes
|
| 161 |
+
|
| 162 |
+
run_button.click(fn=on_run, inputs=[img_in], outputs=[output_table, download_btn])
|
| 163 |
+
|
| 164 |
+
return demo
|
| 165 |
+
|
| 166 |
+
if __name__ == "__main__":
|
| 167 |
+
demo_app = build_interface()
|
| 168 |
+
demo_app.launch()
|