import streamlit as st from PIL import Image import requests from transformers import TrOCRProcessor, VisionEncoderDecoderModel from io import BytesIO import torch import torchvision.transforms as T st.set_page_config(page_title="WikiExplorer AR", layout="centered") st.title("📷 WikiExplorer AR (Streamlit Edition)") # --- Multilingual language selector --- lang = st.selectbox( "🌐 Select Language", options=[ ("English", "en"), ("हिन्दी", "hi"), ("తెలుగు", "te"), ("தமிழ்", "ta"), ], format_func=lambda x: x[0] ) lang_code = lang[1] # --- Load Hugging Face OCR model --- @st.cache_resource def load_trocr(): processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") return processor, model processor, model = load_trocr() # --- Camera input (main source of place name) --- st.markdown("**📸 Capture a place name from signage, poster, or board:**") img_file_buffer = st.camera_input("Take a picture") # --- Optional text input if OCR fails --- place_name = st.text_input("📝 Or manually enter the place name (optional)") # --- OCR from captured image --- def run_trocr_ocr(image_data): image = Image.open(image_data).convert("RGB") transform = T.Compose([ T.Resize((384, 384)), T.ToTensor() ]) pixel_values = transform(image).unsqueeze(0) generated_ids = model.generate(pixel_values) text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return text.strip() if img_file_buffer is not None: st.markdown("### 📷 Captured Image") st.image(img_file_buffer, caption="Uploaded via camera", use_column_width=True) try: with st.spinner("🧠 Running OCR..."): ocr_text = run_trocr_ocr(BytesIO(img_file_buffer.getvalue())) if ocr_text: place_name = ocr_text st.success(f"🧠 OCR detected: **{place_name}**") else: st.warning("OCR ran but could not extract any meaningful text.") except Exception as e: st.error(f"OCR failed: {e}") # --- Translation helpers --- def translate_text(text, target_lang): try: url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl={target_lang}&dt=t&q={requests.utils.quote(text)}" response = requests.get(url) if response.status_code == 200: return response.json()[0][0][0] except: return text return text def translate_paragraph(text, target_lang): sentences = text.split('. ') translated = [] for sentence in sentences: sentence = sentence.strip() if sentence: translated_sentence = translate_text(sentence, target_lang) translated.append(translated_sentence) return '. '.join(translated) # --- Wikipedia + Commons API --- def get_place_info(place, lang): if not place: return None try: # Wikipedia API wiki_url = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{place}" wiki_resp = requests.get(wiki_url) wiki_data = wiki_resp.json() if wiki_resp.status_code == 200 else {} # If summary is missing and not English, try English and translate if (not wiki_data.get("extract") or wiki_data.get("title") == "Not found.") and lang != "en": fallback_resp = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{place}") if fallback_resp.status_code == 200: fallback_data = fallback_resp.json() translated_summary = translate_paragraph(fallback_data.get("extract", ""), lang) wiki_data = fallback_data wiki_data["extract"] = translated_summary # Wikimedia Commons commons_url = ( f"https://commons.wikimedia.org/w/api.php" f"?action=query&format=json&prop=imageinfo&generator=search" f"&gsrsearch={place}&gsrlimit=5&iiprop=url" ) commons_resp = requests.get(commons_url) commons_data = [] if commons_resp.status_code == 200: result = commons_resp.json().get('query', {}).get('pages', {}) for page in result.values(): imginfo = page.get('imageinfo', [{}])[0] img_url = imginfo.get('url') if img_url: commons_data.append({"url": img_url}) return { "wikipedia": wiki_data, "commons": commons_data, } except Exception as e: st.error(f"❌ API request failed: {e}") return None # --- Display content --- if place_name.strip(): st.info(f"🔍 Fetching info for **{place_name}** in **{lang_code.upper()}**...") data = get_place_info(place_name, lang_code) if not data: st.error("⚠️ Could not retrieve data. Check the name or try again.") else: st.subheader(f"📖 About {place_name}") summary = data['wikipedia'].get('extract', 'No information found.') st.write(summary) if 'description' in data['wikipedia']: st.markdown(f"**📌 Type:** _{data['wikipedia']['description']}_") if 'content_urls' in data['wikipedia']: st.markdown("[🔗 Full Wikipedia Page](%s)" % data['wikipedia']['content_urls']['desktop']['page']) if data['commons']: st.markdown("### 🖼️ Related Images") for img in data['commons']: if img and img.get('url'): st.image(img['url'], width=300) else: st.warning("No images found on Wikimedia Commons.") # --- Footer --- st.markdown(""" --- - 📸 Take a picture to auto-detect monument/place using Hugging Face OCR. - ✍️ Optional manual input if OCR fails. - 🌐 Wikipedia multilingual summary with fallback + sentence-level translation. - 🖼️ Commons image gallery integration. - ✅ Works in Hugging Face Spaces with Streamlit + Transformers. """)