WikiExplorer / src /streamlit_app.py
Rohanharsh163's picture
Update src/streamlit_app.py
91b376c verified
raw
history blame
5.63 kB
import streamlit as st
from PIL import Image
import requests
import easyocr
from io import BytesIO
st.set_page_config(page_title="WikiExplorer AR", layout="centered")
st.title("๐Ÿ“ท WikiExplorer AR (Streamlit Edition)")
# --- Multilingual language selector ---
lang = st.selectbox(
"๐ŸŒ Select Language",
options=[
("English", "en"),
("เคนเคฟเคจเฅเคฆเฅ€", "hi"),
("เฐคเฑ†เฐฒเฑเฐ—เฑ", "te"),
("เฎคเฎฎเฎฟเฎดเฏ", "ta"),
],
format_func=lambda x: x[0]
)
lang_code = lang[1]
# --- Load OCR Model ---
@st.cache_resource
def load_ocr_model():
return easyocr.Reader(['en']) # You can add 'hi', 'te', 'ta' for multilingual OCR
ocr_reader = load_ocr_model()
# --- Place name input (optional if image is provided) ---
st.markdown("**๐Ÿ“ Enter a place or person name to learn more (or capture it):**")
place_name = st.text_input("๐Ÿ›๏ธ For example: Charminar, Taj Mahal, Shah Jahan")
# --- Camera input ---
img_file_buffer = st.camera_input("๐Ÿ“ธ Take a picture (optional)")
# --- OCR from camera image ---
if img_file_buffer is not None:
st.markdown("### ๐Ÿ“ท Captured Image")
st.image(img_file_buffer, caption="Uploaded via camera", use_column_width=True)
image_bytes = BytesIO(img_file_buffer.getvalue())
result = ocr_reader.readtext(image_bytes)
if result:
detected_texts = [item[1] for item in result if item[1].strip()]
if detected_texts:
place_name = detected_texts[0] # Top detected phrase
st.success(f"๐Ÿง  OCR detected: **{place_name}**")
else:
st.warning("OCR ran but could not extract any meaningful text.")
else:
st.warning("Could not detect text in the image.")
# --- Translation helpers ---
def translate_text(text, target_lang):
try:
url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl={target_lang}&dt=t&q={requests.utils.quote(text)}"
response = requests.get(url)
if response.status_code == 200:
return response.json()[0][0][0]
except:
return text
return text
def translate_paragraph(text, target_lang):
sentences = text.split('. ')
translated = []
for sentence in sentences:
sentence = sentence.strip()
if sentence:
translated_sentence = translate_text(sentence, target_lang)
translated.append(translated_sentence)
return '. '.join(translated)
# --- Wikipedia + Commons API ---
def get_place_info(place, lang):
if not place:
return None
try:
# Wikipedia API
wiki_url = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{place}"
wiki_resp = requests.get(wiki_url)
wiki_data = wiki_resp.json() if wiki_resp.status_code == 200 else {}
# If summary is missing and not English, try English and translate
if (not wiki_data.get("extract") or wiki_data.get("title") == "Not found.") and lang != "en":
fallback_resp = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{place}")
if fallback_resp.status_code == 200:
fallback_data = fallback_resp.json()
translated_summary = translate_paragraph(fallback_data.get("extract", ""), lang)
wiki_data = fallback_data
wiki_data["extract"] = translated_summary
# Wikimedia Commons
commons_url = (
f"https://commons.wikimedia.org/w/api.php"
f"?action=query&format=json&prop=imageinfo&generator=search"
f"&gsrsearch={place}&gsrlimit=5&iiprop=url"
)
commons_resp = requests.get(commons_url)
commons_data = []
if commons_resp.status_code == 200:
result = commons_resp.json().get('query', {}).get('pages', {})
for page in result.values():
imginfo = page.get('imageinfo', [{}])[0]
img_url = imginfo.get('url')
if img_url:
commons_data.append({"url": img_url})
return {
"wikipedia": wiki_data,
"commons": commons_data,
}
except Exception as e:
st.error(f"โŒ API request failed: {e}")
return None
# --- Display content ---
if place_name.strip():
st.info(f"๐Ÿ” Fetching info for **{place_name}** in **{lang_code.upper()}**...")
data = get_place_info(place_name, lang_code)
if not data:
st.error("โš ๏ธ Could not retrieve data. Check the name or try again.")
else:
st.subheader(f"๐Ÿ“– About {place_name}")
summary = data['wikipedia'].get('extract', 'No information found.')
st.write(summary)
if 'description' in data['wikipedia']:
st.markdown(f"**๐Ÿ“Œ Type:** _{data['wikipedia']['description']}_")
if 'content_urls' in data['wikipedia']:
st.markdown("[๐Ÿ”— Full Wikipedia Page](%s)" % data['wikipedia']['content_urls']['desktop']['page'])
if data['commons']:
st.markdown("### ๐Ÿ–ผ๏ธ Related Images")
for img in data['commons']:
if img and img.get('url'):
st.image(img['url'], width=300)
else:
st.warning("No images found on Wikimedia Commons.")
# --- Footer ---
st.markdown("""
---
- ๐Ÿ“Œ Supports text search and camera input.
- ๐Ÿง  OCR auto-detects place name from image.
- ๐ŸŒ Wikipedia multilingual summary with fallback + sentence-level translation.
- ๐Ÿ–ผ๏ธ Commons image gallery integration.
- โœ… Ready for Hugging Face deployment.
- ๐Ÿ› ๏ธ Streamlit only โ€” no backend needed.
""")