import os
import PyPDF2
from PyPDF2 import PdfReader
import pandas as pd

## Embedding model!
from langchain_huggingface import HuggingFaceEmbeddings
embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")


folder_path = "./"
context_data = []

# List all files in the folder
files = os.listdir(folder_path)

# Get list of CSV and Excel files
data_files = [f for f in files if f.endswith(('.csv', '.xlsx', '.xls'))]

# Process each file
for f, file in enumerate(data_files, 1):
    print(f"\nProcessing file {f}: {file}")
    file_path = os.path.join(folder_path, file)

    try:
        # Read the file based on its extension
        if file.endswith('.csv'):
            df = pd.read_csv(file_path)
        else:
            df = pd.read_excel(file_path)

        # Extract non-empty values from column 2 and append them
        context_data.extend(df.iloc[:, 2].dropna().astype(str).tolist())

    except Exception as e:
        print(f"Error processing file {file}: {str(e)}")


import os
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

def extract_text_from_pdf(pdf_path):
    """Extract text from a PDF file."""
    try:
        with open(pdf_path, "rb") as file:
            reader = PyPDF2.PdfReader(file)
            return "".join(page.extract_text() or "" for page in reader.pages)
    except Exception as e:
        print(f"Error with {pdf_path}: {e}")
        return ""

pdf_files = [f for f in files if f.lower().endswith(".pdf")]

# Process PDFs
documents = []
for file in pdf_files:
    print(f"Processing: {file}")
    pdf_path = os.path.join(folder_path, file)
    text = extract_text_from_pdf(pdf_path)
    if text:
        documents.append(Document(page_content=text, metadata={"source": file}))

# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
    separators=['\n\n', '\n', '.', ','],
    chunk_size=500,
    chunk_overlap=50
)
chunks = text_splitter.split_documents(documents)
text_only_chunks = [chunk.page_content for chunk in chunks]


from urllib.parse import urljoin, urlparse
import requests
from io import BytesIO

from bs4 import BeautifulSoup
from langchain_core.prompts import ChatPromptTemplate
import gradio as gr


def scrape_websites(base_urls):
    try:
        visited_links = set()  # To avoid revisiting the same link
        content_by_url = {}  # Store content from each URL

        for base_url in base_urls:
            if not base_url.strip():
                continue  # Skip empty or invalid URLs

            print(f"Scraping base URL: {base_url}")
            html_content = fetch_page_content(base_url)
            if html_content:
                cleaned_content = clean_body_content(html_content)
                content_by_url[base_url] = cleaned_content
                visited_links.add(base_url)

                # Extract and process all internal links
                soup = BeautifulSoup(html_content, "html.parser")
                links = extract_internal_links(base_url, soup)

                for link in links:
                    if link not in visited_links:
                        print(f"Scraping link: {link}")
                        page_content = fetch_page_content(link)
                        if page_content:
                            cleaned_content = clean_body_content(page_content)
                            content_by_url[link] = cleaned_content
                            visited_links.add(link)

                        # If the link is a PDF file, extract its content
                        if link.lower().endswith('.pdf'):
                            print(f"Extracting PDF content from: {link}")
                            pdf_content = extract_pdf_text(link)
                            if pdf_content:
                                content_by_url[link] = pdf_content

        return content_by_url

    except Exception as e:
        print(f"Error during scraping: {e}")
        return {}


def fetch_page_content(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return None


def extract_internal_links(base_url, soup):
    links = set()
    for anchor in soup.find_all("a", href=True):
        href = anchor["href"]
        full_url = urljoin(base_url, href)
        if is_internal_link(base_url, full_url):
            links.add(full_url)
    return links


def is_internal_link(base_url, link_url):
    base_netloc = urlparse(base_url).netloc
    link_netloc = urlparse(link_url).netloc
    return base_netloc == link_netloc


def extract_pdf_text(pdf_url):
    try:
        response = requests.get(pdf_url)
        response.raise_for_status()

        # Open the PDF from the response content
        with BytesIO(response.content) as file:
            reader = PdfReader(file)
            pdf_text = ""
            for page in reader.pages:
                pdf_text += page.extract_text()

        return pdf_text if pdf_text else None
    except requests.exceptions.RequestException as e:
        print(f"Error fetching PDF {pdf_url}: {e}")
        return None
    except Exception as e:
        print(f"Error reading PDF {pdf_url}: {e}")
        return None


def clean_body_content(html_content):
    soup = BeautifulSoup(html_content, "html.parser")

    # Remove scripts and styles
    for script_or_style in soup(["script", "style"]):
        script_or_style.extract()

    # Get text and clean up
    cleaned_content = soup.get_text(separator="\n")
    cleaned_content = "\n".join(
        line.strip() for line in cleaned_content.splitlines() if line.strip()
    )
    return cleaned_content


# if __name__ == "__main__":
#     website = [
#                #"https://www.rib.gov.rw/index.php?id=371",
#                "https://haguruka.org.rw/our-work/"
#                ]
#     all_content = scrape_websites(website)

#     # Temporary list to store (url, content) tuples
#     temp_list = []

#     # Process and store each URL with its content
#     for url, content in all_content.items():
#         temp_list.append((url, content)) 


# processed_texts = []

# # Process each element in the temporary list
# for element in temp_list:
#     if isinstance(element, tuple):
#         url, content = element  # Unpack the tuple
#         processed_texts.append(f"url: {url}, content: {content}")
#     elif isinstance(element, str):
#         processed_texts.append(element)
#     else:
#         processed_texts.append(str(element))

# def chunk_string(s, chunk_size=2000):
#     return [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]

# # List to store the chunks
# chunked_texts = []

# for text in processed_texts:
#   chunked_texts.extend(chunk_string(text))

data = []
data.extend(context_data)
#data.extend([item for item in text_only_chunks if item not in data])
# data.extend([item for item in chunked_texts if item not in data])


#from langchain_community.vectorstores import Chroma
from langchain_chroma import Chroma


vectorstore = Chroma(
    collection_name="GBV_data_set", 
    embedding_function=embed_model,
)

vectorstore.get().keys()

# add data to vector nstore
vectorstore.add_texts(data)


api= os.environ.get('V1')


from openai import OpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import gradio as gr
from typing import Iterator
import time


#template for GBV support chatbot
template = ("""
You are a compassionate and supportive AI assistant specializing in helping individuals affected by Gender-Based Violence (GBV). Your primary goal is to provide emotionally intelligent support while maintaining appropriate boundaries.
You are a conversational AI. Respond directly and naturally to the user's input without displaying any system messages, backend processes, or 'thinking...' responses. Only provide the final response in a human-like and engaging manner.

When responding follow these guidelines:

1. **Emotional Intelligence**
   - Validate feelings without judgment (e.g., "It is completely understandable to feel this way")
   - Offer reassurance when appropriate, always centered on empowerment
   - Adjust your tone based on the emotional state conveyed

2. **Personalized Communication**
   - Avoid contractions (e.g., use I am instead of I'm)
   - Incorporate thoughtful pauses or reflective questions when the conversation involves difficult topics
   - Use selective emojis (😊, 🤗, ❤️) only when tone-appropriate and not during crisis discussions
   - Balance warmth with professionalism

3. **Conversation Management**
   - Refer to {conversation_history} to maintain continuity and avoid repetition
   - Keep responses concise unless greater detail is explicitly requested
   - Use clear paragraph breaks for readability
   - Prioritize immediate concerns before addressing secondary issues

4. **Information Delivery**
   - Extract only relevant information from {context} that directly addresses the question
   - Present information in accessible, non-technical language
   - Organize resource recommendations in order of relevance and accessibility
   - Provide links [URL] only when specifically requested, prefaced with clear descriptions
   - When information is unavailable, respond with: "I don't have that specific information right now, {first_name}. Would it be helpful if I focus on [alternative support option]?"

5. **Safety and Ethics**
   - Prioritize user safety in all responses
   - Never generate speculative content about their specific situation
   - Avoid phrases that could minimize experiences or create pressure
   - Include gentle reminders about professional help when discussing serious issues

Your response should balance emotional support with practical guidance.

    **Context:** {context}
    **User's Question:** {question}
    **Your Response:**
""")

rag_prompt = PromptTemplate.from_template(template)

retriever = vectorstore.as_retriever()

import requests

API_TOKEN = os.environ.get('Token')

model_name = "facebook/nllb-200-distilled-600M"

url = f"https://api-inference.huggingface.co/models/{model_name}"

headers = {
    "Authorization": f"Bearer {API_TOKEN}"
}

def translate_text(text, src_lang, tgt_lang):
    """Translate text using Hugging Face API"""
    response = requests.post(
        url,
        headers=headers,
        json={
            "inputs": text,
            "parameters": {
                "src_lang": src_lang,
                "tgt_lang": tgt_lang
            }
        }
    )

    if response.status_code == 200:
        result = response.json()
        if isinstance(result, list) and len(result) > 0:
            return result[0]['translation_text']
        return result['translation_text']
    else:
        print(f"Translation error: {response.status_code}, {response.text}")
        return text  # Return original text if translation fails


class OpenRouterLLM:
    def __init__(self, key: str):
        try:
            self.client = OpenAI(
                base_url="https://openrouter.ai/api/v1",
                api_key=key 
            )
            self.headers = {
                "HTTP-Referer": "http://localhost:3000",
                "X-Title": "Local Development"
            }
        except Exception as e:
            print(f"Initialization error: {e}")
            raise
    
    def stream(self, prompt: str) -> Iterator[str]:
        try:
            completion = self.client.chat.completions.create(
                model="deepseek/deepseek-r1-distill-llama-70b:free",
                #model="meta-llama/llama-3.3-70b-instruct:free",
                #model="qwen/qwq-32b:free",
                messages=[{"role": "user", "content": prompt}],
                stream=True
            )
            
            for chunk in completion:
                delta = chunk.choices[0].delta
                if hasattr(delta, "content") and delta.content:
                    yield delta.content
        except Exception as e:
            yield f"Streaming error: {str(e)}"


class UserSession:
    def __init__(self, llm: OpenRouterLLM):  # Accept an instance of OpenRouterLLM
        self.current_user = None
        self.welcome_message = None
        self.conversation_history = []  # Add conversation history storage
        self.llm = llm  # Store the LLM instance

    def set_user(self, user_info):
        self.current_user = user_info
        self.set_welcome_message(user_info.get("Nickname", "Guest"))
        # Initialize conversation history with welcome message
        welcome = self.get_welcome_message()
        self.conversation_history = [
            {"role": "assistant", "content": welcome},
        ]

    def get_user(self):
        return self.current_user

    def set_welcome_message(self, Nickname, src_lang="eng_Latn", tgt_lang="kin_Latn"):
        """Set a dynamic welcome message using the OpenRouterLLM."""
        prompt = (
            f"Create a very brief welcome message for {Nickname}. "
            f"The message should: "
            f"1. Welcome {Nickname} warmly and professionally. "
            f"2. Emphasize that this is a safe and trusted space. "
            f"3. Highlight specialized support for gender-based violence (GBV) and legal assistance. "
            f"4. Use a tone that is warm, reassuring, and professional. "
            f"5. Keep the message concise and impactful."
        )

        # Use the OpenRouterLLM to generate the message
        welcome = "".join(self.llm.stream(prompt))  # Stream and concatenate the response
        welcome_text=translate_text(welcome, src_lang, tgt_lang)

        # Format the message with HTML styling
        self.welcome_message = (
            f"<div style='font-size: 20px;'>"
            f"{welcome_text}"
            f"</div>"
        )

    def get_welcome_message(self):
        return self.welcome_message

    def add_to_history(self, role, message):
        """Add a message to the conversation history"""
        self.conversation_history.append({"role": role, "content": message})

    def get_conversation_history(self):
        """Get the full conversation history"""
        return self.conversation_history

    def get_formatted_history(self):
        """Get conversation history formatted as a string for the LLM"""
        formatted_history = ""
        for entry in self.conversation_history:
            role = "User" if entry["role"] == "user" else "Assistant"
            formatted_history += f"{role}: {entry['content']}\n\n"
        return formatted_history

api_key =api 
llm_instance = OpenRouterLLM(key=api_key)
#llm_instance = model
user_session = UserSession(llm_instance)


def collect_user_info(Nickname):
    if not Nickname:
        return "Nickname is required to proceed.", gr.update(visible=False), gr.update(visible=True), []

    # Store user info for chat session
    user_info = {
        "Nickname": Nickname,
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
    }

    # Set user in session
    user_session.set_user(user_info)

    # Generate welcome message
    welcome_message = user_session.get_welcome_message()

    # Add initial message to start the conversation
    chat_history = add_initial_message([(None, welcome_message)])

    # Return welcome message and update UI
    return welcome_message, gr.update(visible=True), gr.update(visible=False), chat_history

# Add initial message to start the conversation
def add_initial_message(chatbot):
    #initial_message = (" "
   # )
    return chatbot #+ [(None, initial_message)]

# Create RAG chain with user context and conversation history
def create_rag_chain(retriever, template, api_key):
    llm = OpenRouterLLM(api_key)
    rag_prompt = PromptTemplate.from_template(template)

    def stream_func(input_dict):
        # Get context using the retriever's invoke method
        context = retriever.invoke(input_dict["question"])
        context_str = "\n".join([doc.page_content for doc in context])

        # Get user info from the session
        user_info = user_session.get_user() or {}
        first_name = user_info.get("Nickname", "User")
        
        # Get conversation history
        conversation_history = user_session.get_formatted_history()

        # Format prompt with user context and conversation history
        prompt = rag_prompt.format(
            context=context_str,
            question=input_dict["question"],
            first_name=first_name,
            conversation_history=conversation_history
        )

        # Stream response
        return llm.stream(prompt)

    return stream_func

# def rag_memory_stream(message, history):
#     # Add user message to history
#     user_session.add_to_history("user", message)
    
#     # Initialize with empty response
#     partial_text = ""
#     full_response = ""

#     # Use the rag_chain with the question
#     for new_text in rag_chain({"question": message}):
#         partial_text += new_text
#         full_response = partial_text
#         yield partial_text
    
#     # After generating the complete response, add it to history
#     user_session.add_to_history("assistant", full_response)


def rag_memory_stream(message, history, user_lang="kin_Latn", system_lang="eng_Latn"):
    english_message = translate_text(message, user_lang, system_lang)

    user_session.add_to_history("user", english_message)

    full_response = ""

    for new_text in rag_chain({"question": english_message}):
        full_response += new_text


    translated_response = translate_text(full_response, system_lang, user_lang)

    user_session.add_to_history("assistant", full_response)
    
    yield translated_response


import gradio as gr


api_key = api

def chatbot_interface():
    api_key = api
 
    global template

    template = """
        You are a compassionate and supportive AI assistant specializing in helping individuals affected by Gender-Based Violence (GBV). Your responses must be based EXCLUSIVELY on the information provided in the context. Your primary goal is to provide emotionally intelligent support while maintaining appropriate boundaries.
        
        **Previous conversation:** {conversation_history}
        **Context information:** {context}
        **User's Question:** {question}
        
        When responding follow these guidelines:
        
        1. **Strict Context Adherence**
           - Only use information that appears in the provided {context}
           - If the answer is not found in the context, state "I don't have that information in my available resources" rather than generating a response
        
        2. **Personalized Communication**
           - Avoid contractions (e.g., use I am instead of I'm)
           - Incorporate thoughtful pauses or reflective questions when the conversation involves difficult topics
           - Use selective emojis (😊, 🤗, ❤️) only when tone-appropriate and not during crisis discussions
           - Balance warmth with professionalism
                
        3. **Emotional Intelligence** 
           - Validate feelings without judgment 
           - Offer reassurance when appropriate, always centered on empowerment
           - Adjust your tone based on the emotional state conveyed
            
        4. **Conversation Management**
           - Refer to {conversation_history} to maintain continuity and avoid repetition
           - Keep responses concise unless greater detail is explicitly requested
           - Use clear paragraph breaks for readability
                
        5. **Information Delivery**
           - Extract only relevant information from {context} that directly addresses the question
           - Present information in accessible, non-technical language
           - When information is unavailable, respond with: "I don't have that specific information right now, {first_name}. Would it be helpful if I focus on [alternative support option]?"

        
        6. **Safety and Ethics**
           - Do not generate any speculative content or advice not supported by the context
           - If the context contains safety information, prioritize sharing that information
        
        Your response must come entirely from the provided context, maintaining the supportive tone while never introducing information from outside the provided materials.

        **Context:** {context}
        **User's Question:** {question}
        **Your Response:**
    """


    global rag_chain
    rag_chain = create_rag_chain(retriever, template, api_key)

    with gr.Blocks() as demo:
        # User registration section
        with gr.Column(visible=True, elem_id="registration_container") as registration_container:
            gr.Markdown("### Your privacy matters to us! Just share a nickname you feel comfy with to start chatting..")

            with gr.Row():
                first_name = gr.Textbox(
                    label="Nickname",
                    placeholder="Enter your Nickname You feel comfy",
                    scale=1,
                    elem_id="input_nickname"
                )

            with gr.Row():
                submit_btn = gr.Button("Start Chatting", variant="primary", scale=2)

            response_message = gr.Markdown()

        # Chatbot section (initially hidden)
        with gr.Column(visible=False, elem_id="chatbot_container") as chatbot_container:
            chat_interface = gr.ChatInterface(
                fn=rag_memory_stream,
                title="Chat with GBVR",
                fill_height=True
            )

            # Footer with version info
            gr.Markdown("Ijwi ry'Ubufasha Chatbot v1.0.0 © 2025")

        # Handle user registration
        submit_btn.click(
            collect_user_info,
            inputs=[first_name],
            outputs=[response_message, chatbot_container, registration_container, chat_interface.chatbot]
        )

    demo.css = """
    :root {
        --background: #f0f0f0;
        --text: #000000;
    }

    body, .gradio-container {
        margin: 0;
        padding: 0;
        width: 100vw;
        height: 100vh;
        display: flex;
        flex-direction: column;
        justify-content: center;
        align-items: center;
        background: var(--background);
        color: var(--text);
    }

    .gradio-container {
        max-width: 100%;
        max-height: 100%;
    }

    .gr-box {
        background: var(--background);
        color: var(--text);
        border-radius: 12px;
        padding: 2rem;
        border: 1px solid rgba(0, 0, 0, 0.1);
        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
    }

    .gr-button-primary {
        background: var(--background);
        color: var(--text);
        padding: 12px 24px;
        border-radius: 8px;
        transition: all 0.3s ease;
        border: 1px solid rgba(0, 0, 0, 0.1);
    }

    .gr-button-primary:hover {
        transform: translateY(-1px);
        box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
    }

    footer {
        text-align: center;
        color: var(--text);
        opacity: 0.7;
        padding: 1rem;
        font-size: 0.9em;
    }

    .gr-markdown h3 {
        color: var(--text);
        margin-bottom: 1rem;
    }

    .registration-markdown, .chat-title h1 {
        color: var(--text);
    }
    """
    
    return demo

# Launch the interface
if __name__ == "__main__":
    chatbot_interface().launch(share=True)