Spaces:
Sleeping
Sleeping
import streamlit as st | |
from PIL import Image | |
import requests | |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
from io import BytesIO | |
import torch | |
import torchvision.transforms as T | |
st.set_page_config(page_title="WikiExplorer AR", layout="centered") | |
st.title("📷 WikiExplorer AR (Streamlit Edition)") | |
# --- Multilingual language selector --- | |
lang = st.selectbox( | |
"🌐 Select Language", | |
options=[ | |
("English", "en"), | |
("हिन्दी", "hi"), | |
("తెలుగు", "te"), | |
("தமிழ்", "ta"), | |
], | |
format_func=lambda x: x[0] | |
) | |
lang_code = lang[1] | |
# --- Load Hugging Face OCR model --- | |
def load_trocr(): | |
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") | |
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") | |
return processor, model | |
processor, model = load_trocr() | |
# --- Camera input (main source of place name) --- | |
st.markdown("**📸 Capture a place name from signage, poster, or board:**") | |
img_file_buffer = st.camera_input("Take a picture") | |
# --- Optional text input if OCR fails --- | |
place_name = st.text_input("📝 Or manually enter the place name (optional)") | |
# --- OCR from captured image --- | |
def run_trocr_ocr(image_data): | |
image = Image.open(image_data).convert("RGB") | |
transform = T.Compose([ | |
T.Resize((384, 384)), | |
T.ToTensor() | |
]) | |
pixel_values = transform(image).unsqueeze(0) | |
generated_ids = model.generate(pixel_values) | |
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
return text.strip() | |
if img_file_buffer is not None: | |
st.markdown("### 📷 Captured Image") | |
st.image(img_file_buffer, caption="Uploaded via camera", use_column_width=True) | |
try: | |
with st.spinner("🧠 Running OCR..."): | |
ocr_text = run_trocr_ocr(BytesIO(img_file_buffer.getvalue())) | |
if ocr_text: | |
place_name = ocr_text | |
st.success(f"🧠 OCR detected: **{place_name}**") | |
else: | |
st.warning("OCR ran but could not extract any meaningful text.") | |
except Exception as e: | |
st.error(f"OCR failed: {e}") | |
# --- Translation helpers --- | |
def translate_text(text, target_lang): | |
try: | |
url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl={target_lang}&dt=t&q={requests.utils.quote(text)}" | |
response = requests.get(url) | |
if response.status_code == 200: | |
return response.json()[0][0][0] | |
except: | |
return text | |
return text | |
def translate_paragraph(text, target_lang): | |
sentences = text.split('. ') | |
translated = [] | |
for sentence in sentences: | |
sentence = sentence.strip() | |
if sentence: | |
translated_sentence = translate_text(sentence, target_lang) | |
translated.append(translated_sentence) | |
return '. '.join(translated) | |
# --- Wikipedia + Commons API --- | |
def get_place_info(place, lang): | |
if not place: | |
return None | |
try: | |
# Wikipedia API | |
wiki_url = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{place}" | |
wiki_resp = requests.get(wiki_url) | |
wiki_data = wiki_resp.json() if wiki_resp.status_code == 200 else {} | |
# If summary is missing and not English, try English and translate | |
if (not wiki_data.get("extract") or wiki_data.get("title") == "Not found.") and lang != "en": | |
fallback_resp = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{place}") | |
if fallback_resp.status_code == 200: | |
fallback_data = fallback_resp.json() | |
translated_summary = translate_paragraph(fallback_data.get("extract", ""), lang) | |
wiki_data = fallback_data | |
wiki_data["extract"] = translated_summary | |
# Wikimedia Commons | |
commons_url = ( | |
f"https://commons.wikimedia.org/w/api.php" | |
f"?action=query&format=json&prop=imageinfo&generator=search" | |
f"&gsrsearch={place}&gsrlimit=5&iiprop=url" | |
) | |
commons_resp = requests.get(commons_url) | |
commons_data = [] | |
if commons_resp.status_code == 200: | |
result = commons_resp.json().get('query', {}).get('pages', {}) | |
for page in result.values(): | |
imginfo = page.get('imageinfo', [{}])[0] | |
img_url = imginfo.get('url') | |
if img_url: | |
commons_data.append({"url": img_url}) | |
return { | |
"wikipedia": wiki_data, | |
"commons": commons_data, | |
} | |
except Exception as e: | |
st.error(f"❌ API request failed: {e}") | |
return None | |
# --- Display content --- | |
if place_name.strip(): | |
st.info(f"🔍 Fetching info for **{place_name}** in **{lang_code.upper()}**...") | |
data = get_place_info(place_name, lang_code) | |
if not data: | |
st.error("⚠️ Could not retrieve data. Check the name or try again.") | |
else: | |
st.subheader(f"📖 About {place_name}") | |
summary = data['wikipedia'].get('extract', 'No information found.') | |
st.write(summary) | |
if 'description' in data['wikipedia']: | |
st.markdown(f"**📌 Type:** _{data['wikipedia']['description']}_") | |
if 'content_urls' in data['wikipedia']: | |
st.markdown("[🔗 Full Wikipedia Page](%s)" % data['wikipedia']['content_urls']['desktop']['page']) | |
if data['commons']: | |
st.markdown("### 🖼️ Related Images") | |
for img in data['commons']: | |
if img and img.get('url'): | |
st.image(img['url'], width=300) | |
else: | |
st.warning("No images found on Wikimedia Commons.") | |
# --- Footer --- | |
st.markdown(""" | |
--- | |
- 📸 Take a picture to auto-detect monument/place using Hugging Face OCR. | |
- ✍️ Optional manual input if OCR fails. | |
- 🌐 Wikipedia multilingual summary with fallback + sentence-level translation. | |
- 🖼️ Commons image gallery integration. | |
- ✅ Works in Hugging Face Spaces with Streamlit + Transformers. | |
""") | |