import os
import streamlit as st
import tempfile
from pymongo import MongoClient
from datetime import datetime
from pathlib import Path
from document_chunker import DocumentChunker
from urllib.parse import quote_plus

# === MongoDB connection via Hugging Face secrets ===
user = quote_plus(os.getenv("MONGO_USER"))
password = quote_plus(os.getenv("MONGO_PASS"))
cluster = os.getenv("MONGO_CLUSTER")
# db_name = "grant_docs"
db_name = os.environ.get("MONGO_DB", "grant_docs")
mongo_uri = f"mongodb+srv://{user}:{password}@{cluster}/{db_name}?retryWrites=true&w=majority&tls=true&tlsAllowInvalidCertificates=true"
# mongo_uri = os.environ["MONGO_URI"]
client = MongoClient(mongo_uri, tls=True, tlsAllowInvalidCertificates=True, serverSelectionTimeoutMS=20000)
db = client[db_name]

# === Streamlit UI ===
st.set_page_config(page_title="Doc Chunker", layout="wide")
st.title("📄 Document Chunker & Uploader")

with st.sidebar:
    st.header("Settings")

    # Fetch collection names for dropdown
    try:
        existing_collections = db.list_collection_names()
        selected_collection = st.selectbox("Choose MongoDB Collection", existing_collections, index=existing_collections.index("doc_chunks_cat") if "doc_chunks_cat" in existing_collections else 0)
    except Exception as e:
        st.error(f"Failed to list collections: {e}")
        selected_collection = "doc_chunks_cat"

    is_grant_app = st.toggle("Is this a Grant Application?", value=True)

uploaded_file = st.file_uploader("Upload a DOCX or TXT file", type=["docx", "txt"])

if uploaded_file:
    temp_path = Path(tempfile.gettempdir()) / uploaded_file.name
    with open(temp_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    st.success(f"Uploaded `{uploaded_file.name}`")

    modified_time = datetime.now().isoformat()
    collection = db[selected_collection]

    if collection.find_one({"metadata.title": uploaded_file.name}):
        st.warning("⚠️ This file already exists in the collection. Skipping...")
    else:
        st.write("⏳ Processing with DocumentChunker...")
        chunker = DocumentChunker()
        chunks = chunker.process_document(str(temp_path))

        if chunks:
            for chunk in chunks:
                chunk['metadata'].update({
                    "title": uploaded_file.name,
                    "uploaded_at": modified_time,
                    "is_grant_app": is_grant_app,
                })
                collection.insert_one(chunk)

            st.success(f"✅ {len(chunks)} chunks inserted into `{selected_collection}`")

            # Show a few previews
            for i, c in enumerate(chunks[:3]):
                st.subheader(f"Chunk {i+1}: {c['metadata'].get('header') or 'No Header'}")
                st.markdown(c['text'][:400] + "...")
                st.caption(f"Topics: {', '.join(c['metadata']['topics'])} | Category: {c['metadata']['category']}")
                st.progress(c['metadata']['confidence_score'])

            if len(chunks) > 3:
                st.info(f"... and {len(chunks)-3} more chunks processed.")
        else:
            st.warning("⚠️ No chunks were generated.")