Spaces:

Rohanharsh163
/

WikiExplorer

Sleeping

App Files Files Community

WikiExplorer / src /streamlit_app.py

Rohanharsh163

Update src/streamlit_app.py

91b376c verified 3 months ago

raw

history blame

5.63 kB

	import streamlit as st
	from PIL import Image
	import requests
	import easyocr
	from io import BytesIO

	st.set_page_config(page_title="WikiExplorer AR", layout="centered")
	st.title("📷 WikiExplorer AR (Streamlit Edition)")

	# --- Multilingual language selector ---
	lang = st.selectbox(
	"🌐 Select Language",
	options=[
	("English", "en"),
	("हिन्दी", "hi"),
	("తెలుగు", "te"),
	("தமிழ்", "ta"),
	],
	format_func=lambda x: x[0]
	)

	lang_code = lang[1]

	# --- Load OCR Model ---
	@st.cache_resource
	def load_ocr_model():
	return easyocr.Reader(['en']) # You can add 'hi', 'te', 'ta' for multilingual OCR

	ocr_reader = load_ocr_model()

	# --- Place name input (optional if image is provided) ---
	st.markdown("📝 Enter a place or person name to learn more (or capture it):")
	place_name = st.text_input("🏛️ For example: Charminar, Taj Mahal, Shah Jahan")

	# --- Camera input ---
	img_file_buffer = st.camera_input("📸 Take a picture (optional)")

	# --- OCR from camera image ---
	if img_file_buffer is not None:
	st.markdown("### 📷 Captured Image")
	st.image(img_file_buffer, caption="Uploaded via camera", use_column_width=True)

	image_bytes = BytesIO(img_file_buffer.getvalue())
	result = ocr_reader.readtext(image_bytes)

	if result:
	detected_texts = [item[1] for item in result if item[1].strip()]
	if detected_texts:
	place_name = detected_texts[0] # Top detected phrase
	st.success(f"🧠 OCR detected: {place_name}")
	else:
	st.warning("OCR ran but could not extract any meaningful text.")
	else:
	st.warning("Could not detect text in the image.")

	# --- Translation helpers ---
	def translate_text(text, target_lang):
	try:
	url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl={target_lang}&dt=t&q={requests.utils.quote(text)}"
	response = requests.get(url)
	if response.status_code == 200:
	return response.json()[0][0][0]
	except:
	return text
	return text

	def translate_paragraph(text, target_lang):
	sentences = text.split('. ')
	translated = []
	for sentence in sentences:
	sentence = sentence.strip()
	if sentence:
	translated_sentence = translate_text(sentence, target_lang)
	translated.append(translated_sentence)
	return '. '.join(translated)

	# --- Wikipedia + Commons API ---
	def get_place_info(place, lang):
	if not place:
	return None

	try:
	# Wikipedia API
	wiki_url = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{place}"
	wiki_resp = requests.get(wiki_url)
	wiki_data = wiki_resp.json() if wiki_resp.status_code == 200 else {}

	# If summary is missing and not English, try English and translate
	if (not wiki_data.get("extract") or wiki_data.get("title") == "Not found.") and lang != "en":
	fallback_resp = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{place}")
	if fallback_resp.status_code == 200:
	fallback_data = fallback_resp.json()
	translated_summary = translate_paragraph(fallback_data.get("extract", ""), lang)
	wiki_data = fallback_data
	wiki_data["extract"] = translated_summary

	# Wikimedia Commons
	commons_url = (
	f"https://commons.wikimedia.org/w/api.php"
	f"?action=query&format=json&prop=imageinfo&generator=search"
	f"&gsrsearch={place}&gsrlimit=5&iiprop=url"
	)
	commons_resp = requests.get(commons_url)
	commons_data = []
	if commons_resp.status_code == 200:
	result = commons_resp.json().get('query', {}).get('pages', {})
	for page in result.values():
	imginfo = page.get('imageinfo', [{}])[0]
	img_url = imginfo.get('url')
	if img_url:
	commons_data.append({"url": img_url})

	return {
	"wikipedia": wiki_data,
	"commons": commons_data,
	}
	except Exception as e:
	st.error(f"❌ API request failed: {e}")
	return None

	# --- Display content ---
	if place_name.strip():
	st.info(f"🔍 Fetching info for {place_name} in {lang_code.upper()}...")
	data = get_place_info(place_name, lang_code)

	if not data:
	st.error("⚠️ Could not retrieve data. Check the name or try again.")
	else:
	st.subheader(f"📖 About {place_name}")
	summary = data['wikipedia'].get('extract', 'No information found.')
	st.write(summary)

	if 'description' in data['wikipedia']:
	st.markdown(f"📌 Type: _{data['wikipedia']['description']}_")

	if 'content_urls' in data['wikipedia']:
	st.markdown("[🔗 Full Wikipedia Page](%s)" % data['wikipedia']['content_urls']['desktop']['page'])

	if data['commons']:
	st.markdown("### 🖼️ Related Images")
	for img in data['commons']:
	if img and img.get('url'):
	st.image(img['url'], width=300)
	else:
	st.warning("No images found on Wikimedia Commons.")

	# --- Footer ---
	st.markdown("""
	---
	- 📌 Supports text search and camera input.
	- 🧠 OCR auto-detects place name from image.
	- 🌐 Wikipedia multilingual summary with fallback + sentence-level translation.
	- 🖼️ Commons image gallery integration.
	- ✅ Ready for Hugging Face deployment.
	- 🛠️ Streamlit only — no backend needed.
	""")