import streamlit as st
from PIL import Image
import requests
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from io import BytesIO
import torch
import torchvision.transforms as T

st.set_page_config(page_title="WikiExplorer AR", layout="centered")
st.title("📷 WikiExplorer AR (Streamlit Edition)")

# --- Multilingual language selector ---
lang = st.selectbox(
    "🌐 Select Language",
    options=[
        ("English", "en"),
        ("हिन्दी", "hi"),
        ("తెలుగు", "te"),
        ("தமிழ்", "ta"),
    ],
    format_func=lambda x: x[0]
)

lang_code = lang[1]

# --- Load Hugging Face OCR model ---
@st.cache_resource
def load_trocr():
    processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
    model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
    return processor, model

processor, model = load_trocr()

# --- Camera input (main source of place name) ---
st.markdown("**📸 Capture a place name from signage, poster, or board:**")
img_file_buffer = st.camera_input("Take a picture")

# --- Optional text input if OCR fails ---
place_name = st.text_input("📝 Or manually enter the place name (optional)")

# --- OCR from captured image ---
def run_trocr_ocr(image_data):
    image = Image.open(image_data).convert("RGB")
    transform = T.Compose([
        T.Resize((384, 384)),
        T.ToTensor()
    ])
    pixel_values = transform(image).unsqueeze(0)
    generated_ids = model.generate(pixel_values)
    text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return text.strip()

if img_file_buffer is not None:
    st.markdown("### 📷 Captured Image")
    st.image(img_file_buffer, caption="Uploaded via camera", use_column_width=True)
    try:
        with st.spinner("🧠 Running OCR..."):
            ocr_text = run_trocr_ocr(BytesIO(img_file_buffer.getvalue()))
            if ocr_text:
                place_name = ocr_text
                st.success(f"🧠 OCR detected: **{place_name}**")
            else:
                st.warning("OCR ran but could not extract any meaningful text.")
    except Exception as e:
        st.error(f"OCR failed: {e}")

# --- Translation helpers ---
def translate_text(text, target_lang):
    try:
        url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl={target_lang}&dt=t&q={requests.utils.quote(text)}"
        response = requests.get(url)
        if response.status_code == 200:
            return response.json()[0][0][0]
    except:
        return text
    return text

def translate_paragraph(text, target_lang):
    sentences = text.split('. ')
    translated = []
    for sentence in sentences:
        sentence = sentence.strip()
        if sentence:
            translated_sentence = translate_text(sentence, target_lang)
            translated.append(translated_sentence)
    return '. '.join(translated)

# --- Wikipedia + Commons API ---
def get_place_info(place, lang):
    if not place:
        return None

    try:
        # Wikipedia API
        wiki_url = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{place}"
        wiki_resp = requests.get(wiki_url)
        wiki_data = wiki_resp.json() if wiki_resp.status_code == 200 else {}

        # If summary is missing and not English, try English and translate
        if (not wiki_data.get("extract") or wiki_data.get("title") == "Not found.") and lang != "en":
            fallback_resp = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{place}")
            if fallback_resp.status_code == 200:
                fallback_data = fallback_resp.json()
                translated_summary = translate_paragraph(fallback_data.get("extract", ""), lang)
                wiki_data = fallback_data
                wiki_data["extract"] = translated_summary

        # Wikimedia Commons
        commons_url = (
            f"https://commons.wikimedia.org/w/api.php"
            f"?action=query&format=json&prop=imageinfo&generator=search"
            f"&gsrsearch={place}&gsrlimit=5&iiprop=url"
        )
        commons_resp = requests.get(commons_url)
        commons_data = []
        if commons_resp.status_code == 200:
            result = commons_resp.json().get('query', {}).get('pages', {})
            for page in result.values():
                imginfo = page.get('imageinfo', [{}])[0]
                img_url = imginfo.get('url')
                if img_url:
                    commons_data.append({"url": img_url})

        return {
            "wikipedia": wiki_data,
            "commons": commons_data,
        }
    except Exception as e:
        st.error(f"❌ API request failed: {e}")
        return None

# --- Display content ---
if place_name.strip():
    st.info(f"🔍 Fetching info for **{place_name}** in **{lang_code.upper()}**...")
    data = get_place_info(place_name, lang_code)

    if not data:
        st.error("⚠️ Could not retrieve data. Check the name or try again.")
    else:
        st.subheader(f"📖 About {place_name}")
        summary = data['wikipedia'].get('extract', 'No information found.')
        st.write(summary)

        if 'description' in data['wikipedia']:
            st.markdown(f"**📌 Type:** _{data['wikipedia']['description']}_")

        if 'content_urls' in data['wikipedia']:
            st.markdown("[🔗 Full Wikipedia Page](%s)" % data['wikipedia']['content_urls']['desktop']['page'])

        if data['commons']:
            st.markdown("### 🖼️ Related Images")
            for img in data['commons']:
                if img and img.get('url'):
                    st.image(img['url'], width=300)
        else:
            st.warning("No images found on Wikimedia Commons.")

# --- Footer ---
st.markdown("""
---
- 📸 Take a picture to auto-detect monument/place using Hugging Face OCR.
- ✍️ Optional manual input if OCR fails.
- 🌐 Wikipedia multilingual summary with fallback + sentence-level translation.
- 🖼️ Commons image gallery integration.
- ✅ Works in Hugging Face Spaces with Streamlit + Transformers.
""")