import spaces
import json
import subprocess
import os
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
import gradio as gr
from huggingface_hub import hf_hub_download
import tempfile
from typing import List, Tuple, Optional

# PDF 처리 라이브러리 조건부 import
try:
    from docling.document_converter import DocumentConverter
    DOCLING_AVAILABLE = True
except ImportError:
    DOCLING_AVAILABLE = False
    print("Docling not available, using alternative PDF processing")
    try:
        import PyPDF2
        import pdfplumber
    except ImportError:
        print("Warning: PDF processing libraries not fully installed")

# 환경 변수에서 HF_TOKEN 가져오기
HF_TOKEN = os.getenv("HF_TOKEN")

# 전역 변수 초기화 (중요!)
llm = None
llm_model = None
document_context = ""  # PDF에서 추출한 문서 컨텍스트 저장
document_filename = ""  # 현재 로드된 문서의 파일명

print("전역 변수 초기화 완료")
print(f"document_context 초기값: '{document_context}'")
print(f"document_filename 초기값: '{document_filename}'")

# 모델 이름과 경로를 정의
MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"

# 모델 다운로드 (HF_TOKEN 사용)
model_path = hf_hub_download(
    repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
    filename=MISTRAL_MODEL_NAME,
    local_dir="./models",
    token=HF_TOKEN
)

print(f"Downloaded model path: {model_path}")

css = """
.bubble-wrap {
    padding-top: calc(var(--spacing-xl) * 3) !important;
}
.message-row {
    justify-content: space-evenly !important;
    width: 100% !important;
    max-width: 100% !important;
    margin: calc(var(--spacing-xl)) 0 !important;
    padding: 0 calc(var(--spacing-xl) * 3) !important;
}
.flex-wrap.user {
    border-bottom-right-radius: var(--radius-lg) !important;
}
.flex-wrap.bot {
    border-bottom-left-radius: var(--radius-lg) !important;
}
.message.user{
    padding: 10px;
}
.message.bot{
    text-align: right;
    width: 100%;
    padding: 10px;
    border-radius: 10px;
}
.message-bubble-border {
    border-radius: 6px !important;
}
.message-buttons {
    justify-content: flex-end !important;
}
.message-buttons-left {
    align-self: end !important;
}
.message-buttons-bot, .message-buttons-user {
    right: 10px !important;
    left: auto !important;
    bottom: 2px !important;
}
.dark.message-bubble-border {
    border-color: #343140 !important;
}
.dark.user {
    background: #1e1c26 !important;
}
.dark.assistant.dark, .dark.pending.dark {
    background: #16141c !important;
}
.upload-container {
    margin-bottom: 20px;
    padding: 15px;
    border: 2px dashed #666;
    border-radius: 10px;
    background-color: #f0f0f0;
}
.dark .upload-container {
    background-color: #292733;
    border-color: #444;
}
"""

def get_messages_formatter_type(model_name):
    if "Mistral" in model_name or "BitSix" in model_name:
        return MessagesFormatterType.MISTRAL  # CHATML 대신 MISTRAL 형식 사용
    else:
        raise ValueError(f"Unsupported model: {model_name}")

@spaces.GPU
def convert_pdf_to_markdown(file):
    """PDF 파일을 Markdown으로 변환"""
    global document_context, document_filename
    
    if file is None:
        return "파일이 업로드되지 않았습니다.", {}
    
    try:
        print(f"\n=== PDF 변환 시작 ===")
        print(f"파일 경로: {file.name}")
        
        # DocumentConverter 인스턴스 생성
        converter = DocumentConverter()
        
        # 파일 변환
        result = converter.convert(file.name)
        
        # Markdown으로 내보내기
        markdown_content = result.document.export_to_markdown()
        
        # 문서 컨텍스트 업데이트 (중요!)
        document_context = markdown_content
        document_filename = os.path.basename(file.name)
        
        # 메타데이터 추출
        metadata = {
            "filename": document_filename,
            "conversion_status": "success",
            "content_length": len(markdown_content),
            "preview": markdown_content[:500] + "..." if len(markdown_content) > 500 else markdown_content
        }
        
        print(f"✅ PDF 변환 성공!")
        print(f"📄 파일명: {document_filename}")
        print(f"📏 문서 길이: {len(markdown_content)} 문자")
        print(f"📝 문서 시작 300자:\n{markdown_content[:300]}...")
        print(f"=== PDF 변환 완료 ===\n")
        
        # 전역 변수 확인 및 강제 설정
        print(f"\n=== 전역 변수 설정 전 ===")
        print(f"global document_context 길이: {len(document_context)}")
        print(f"global document_filename: {document_filename}")
        
        # globals() 함수를 사용하여 강제로 전역 변수 설정
        globals()['document_context'] = markdown_content
        globals()['document_filename'] = document_filename
        
        print(f"\n=== 전역 변수 설정 후 ===")
        print(f"global document_context 길이: {len(globals()['document_context'])}")
        print(f"global document_filename: {globals()['document_filename']}")
        
        return markdown_content, metadata
        
    except Exception as e:
        error_msg = f"PDF 변환 중 오류 발생: {str(e)}"
        print(f"❌ {error_msg}")
        document_context = ""
        document_filename = ""
        return error_msg, {"error": str(e)}

def find_relevant_chunks(document, query, chunk_size=1500, overlap=300):
    """문서에서 질문과 관련된 청크 찾기"""
    if not document:
        return ""
    
    print(f"관련 청크 찾기 시작 - 쿼리: {query}")
    
    # 간단한 키워드 기반 검색
    query_words = query.lower().split()
    chunks = []
    
    # 문서를 청크로 나누기
    for i in range(0, len(document), chunk_size - overlap):
        chunk = document[i:i + chunk_size]
        chunks.append((i, chunk))
    
    print(f"총 {len(chunks)}개의 청크로 분할됨")
    
    # 각 청크의 관련성 점수 계산
    scored_chunks = []
    for idx, chunk in chunks:
        chunk_lower = chunk.lower()
        score = sum(1 for word in query_words if word in chunk_lower)
        if score > 0:
            scored_chunks.append((score, idx, chunk))
    
    # 상위 2개 청크 선택 (메모리 절약)
    scored_chunks.sort(reverse=True, key=lambda x: x[0])
    relevant_chunks = scored_chunks[:2]
    
    if relevant_chunks:
        result = ""
        for score, idx, chunk in relevant_chunks:
            result += f"\n[문서의 {idx}번째 위치에서 발췌 - 관련도: {score}]\n{chunk}\n"
        print(f"{len(relevant_chunks)}개의 관련 청크 찾음")
        return result
    else:
        # 관련 청크를 찾지 못한 경우 문서 시작 부분 반환
        print("관련 청크를 찾지 못함, 문서 시작 부분 반환")
        return document[:2000]

@spaces.GPU(duration=120)
def respond(
    message,
    history: list[dict],
    system_message,
    max_tokens,
    temperature,
    top_p,
    top_k,
    repeat_penalty,
):
    global llm, llm_model
    
    # globals()를 사용하여 전역 변수에 접근
    document_context = globals().get('document_context', '')
    document_filename = globals().get('document_filename', '')
    
    # 디버깅을 위한 상세 로그
    print(f"\n=== RESPOND 함수 시작 ===")
    print(f"사용자 메시지: {message}")
    print(f"문서 컨텍스트 존재 여부: {bool(document_context)}")
    if document_context:
        print(f"문서 길이: {len(document_context)}")
        print(f"문서 파일명: {document_filename}")
        print(f"문서 시작 100자: {document_context[:100]}...")
    else:
        print("⚠️ document_context가 비어있습니다!")
        print(f"globals()의 키들: {list(globals().keys())[:20]}...")  # 처음 20개 키만
    
    chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME)
    
    # 모델 파일 경로 확인
    model_path_local = os.path.join("./models", MISTRAL_MODEL_NAME)
    
    if llm is None or llm_model != MISTRAL_MODEL_NAME:
        print("LLM 모델 로딩 중...")
        llm = Llama(
            model_path=model_path_local,
            flash_attn=True,
            n_gpu_layers=81,
            n_batch=1024,
            n_ctx=16384,  # 컨텍스트 크기
            verbose=True  # 디버깅을 위한 상세 로그
        )
        llm_model = MISTRAL_MODEL_NAME
        print("LLM 모델 로딩 완료!")
    
    provider = LlamaCppPythonProvider(llm)
    
    # 한국어 답변을 위한 기본 시스템 메시지
    korean_system_message = system_message  # 사용자가 설정한 시스템 메시지 사용
    
    # 문서 컨텍스트가 있으면 시스템 메시지와 사용자 메시지 모두에 포함
    if document_context and len(document_context) > 0:
        doc_length = len(document_context)
        print(f"📄 문서 컨텍스트를 메시지에 포함합니다: {doc_length} 문자")
        
        # 시스템 메시지에도 문서 정보 추가
        korean_system_message += f"\n\n현재 '{document_filename}' PDF 문서가 로드되어 있습니다. 사용자의 모든 질문에 대해 이 문서의 내용을 반드시 참조하여 답변하세요."
        
        # 문서 내용을 적절한 크기로 제한
        max_doc_length = 4000  # 최대 4000자로 제한
        if doc_length > max_doc_length:
            # 문서가 너무 긴 경우 처음과 끝 부분만 포함
            doc_snippet = document_context[:2000] + "\n\n[... 중간 내용 생략 ...]\n\n" + document_context[-1500:]
            enhanced_message = f"""업로드된 PDF 문서 정보:
- 파일명: {document_filename}
- 문서 길이: {doc_length} 문자

문서 내용 (일부):
{doc_snippet}

사용자 질문: {message}

위 문서를 참고하여 한국어로 답변해주세요."""
        else:
            # 짧은 문서는 전체 포함
            enhanced_message = f"""업로드된 PDF 문서 정보:
- 파일명: {document_filename}
- 문서 길이: {doc_length} 문자

문서 내용:
{document_context}

사용자 질문: {message}

위 문서를 참고하여 한국어로 답변해주세요."""
        
        print(f"강화된 메시지 길이: {len(enhanced_message)}")
        print(f"메시지 미리보기 (처음 300자):\n{enhanced_message[:300]}...")
        
        # 디버그: 최종 메시지 파일로 저장 (확인용)
        with open("debug_last_message.txt", "w", encoding="utf-8") as f:
            f.write(f"=== 디버그 정보 ===\n")
            f.write(f"문서 길이: {len(document_context)}\n")
            f.write(f"파일명: {document_filename}\n")
            f.write(f"사용자 질문: {message}\n")
            f.write(f"\n=== 전송될 메시지 ===\n")
            f.write(enhanced_message)
    else:
        # 문서가 없는 경우
        enhanced_message = message
        if any(keyword in message.lower() for keyword in ["문서", "pdf", "업로드", "파일", "내용", "요약"]):
            enhanced_message = f"{message}\n\n[시스템 메시지: 현재 업로드된 PDF 문서가 없습니다. PDF 파일을 먼저 업로드해주세요.]"
            print("문서 관련 질문이지만 문서가 없음")
        
        # 디버그 메시지
        print("⚠️ 경고: document_context가 비어있습니다!")
        print(f"document_context 타입: {type(document_context)}")
        print(f"document_context 값: {repr(document_context)}")
        print(f"document_filename: {document_filename}")

    settings = provider.get_provider_default_settings()
    settings.temperature = temperature
    settings.top_k = top_k
    settings.top_p = top_p
    settings.max_tokens = max_tokens
    settings.repeat_penalty = repeat_penalty
    settings.stream = True

    # 시스템 프롬프트에 문서 내용 직접 포함 (문서가 있는 경우)
    if document_context and len(document_context) > 0:
        doc_snippet = document_context[:3000]  # 처음 3000자만 사용
        enhanced_system_prompt = f"""{korean_system_message}

현재 로드된 PDF 문서:
파일명: {document_filename}
문서 내용:
{doc_snippet}
{'' if len(document_context) <= 3000 else '... (이하 생략)'}

위 문서의 내용을 바탕으로 사용자의 질문에 답변하세요."""
        
        # 사용자 메시지는 단순하게
        final_message = message
    else:
        enhanced_system_prompt = korean_system_message
        final_message = enhanced_message

    agent = LlamaCppAgent(
        provider,
        system_prompt=enhanced_system_prompt,
        predefined_messages_formatter_type=chat_template,
        debug_output=True
    )

    messages = BasicChatHistory()

    # 이전 대화 기록 추가 (수정됨)
    for i in range(0, len(history)):
        # 현재 메시지는 제외
        if i < len(history) - 1 and history[i][1] is not None:
            # 사용자 메시지
            messages.add_message({
                'role': Roles.user,
                'content': history[i][0]
            })
            # 어시스턴트 메시지
            messages.add_message({
                'role': Roles.assistant,
                'content': history[i][1]
            })
    
    print(f"최종 메시지 전송 중: {final_message}")
    
    # 스트림 응답 생성
    try:
        stream = agent.get_chat_response(
            final_message,  # 단순한 메시지 사용
            llm_sampling_settings=settings,
            chat_history=messages,
            returns_streaming_generator=True,
            print_output=False
        )
        
        outputs = ""
        for output in stream:
            outputs += output
            yield outputs
    except Exception as e:
        print(f"스트림 생성 중 오류: {e}")
        yield "죄송합니다. 응답 생성 중 오류가 발생했습니다. 다시 시도해주세요."

def clear_document_context():
    """문서 컨텍스트 초기화"""
    global document_context, document_filename
    document_context = ""
    document_filename = ""
    return "📭 문서 컨텍스트가 초기화되었습니다. 새로운 PDF를 업로드해주세요."

def check_document_status():
    """현재 문서 상태 확인"""
    global document_context, document_filename
    print(f"\n=== 문서 상태 확인 ===")
    print(f"document_context 타입: {type(document_context)}")
    print(f"document_context 길이: {len(document_context) if document_context else 0}")
    print(f"document_filename: '{document_filename}'")
    
    if document_context and len(document_context) > 0:
        status = f"✅ 문서가 로드되어 있습니다.\n📄 파일명: {document_filename}\n📏 문서 길이: {len(document_context):,} 문자"
        print(f"문서 첫 100자: {document_context[:100]}")
        return status
    else:
        return "📭 로드된 문서가 없습니다. PDF 파일을 업로드해주세요."

# Gradio 인터페이스 구성
with gr.Blocks(theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="cyan",
        neutral_hue="gray",
        font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]
    ).set(
        body_background_fill="#f8f9fa",
        block_background_fill="#ffffff",
        block_border_width="1px",
        block_title_background_fill="#e9ecef",
        input_background_fill="#ffffff",
        button_secondary_background_fill="#e9ecef",
        border_color_accent="#dee2e6",
        border_color_primary="#ced4da",
        background_fill_secondary="#f8f9fa",
        color_accent_soft="transparent",
        code_background_fill="#f1f3f5",
    ), css=css) as demo:
    
    gr.Markdown("# 온프레미스 최적화 'LLM+RAG 모델' 서비스 by VIDraft")
    gr.Markdown("📄 PDF 문서를 업로드하면 AI가 문서 내용을 분석하여 질문에 답변합니다.")
    gr.Markdown("💡 사용법: 1) 아래에서 PDF 업로드 → 2) 문서에 대한 질문 입력 → 3) AI가 한국어로 답변")
    
    # 채팅 인터페이스를 위쪽에 배치
    with gr.Row():
        with gr.Column():
            # 채팅 인터페이스
            chatbot = gr.Chatbot(elem_id="chatbot", height=500)
            msg = gr.Textbox(
                label="메시지 입력",
                placeholder="질문을 입력하세요... (PDF를 업로드하면 문서 내용에 대해 질문할 수 있습니다)",
                lines=2
            )
            with gr.Row():
                submit = gr.Button("전송", variant="primary")
                clear_chat = gr.Button("대화 초기화")
    
    # 예제를 중간에 배치
    gr.Examples(
        examples=[
            ["이 문서는 무엇에 관한 내용인가요?"],
            ["업로드한 PDF 문서의 주요 내용을 한국어로 요약해주세요."],
            ["문서에 나온 일정을 알려주세요."],
            ["문서에서 가장 중요한 3가지 핵심 포인트는 무엇인가요?"],
            ["이 행사의 개요를 설명해주세요."]
        ],
        inputs=msg
    )
    
    # PDF 업로드 섹션을 아래쪽에 배치
    with gr.Accordion("📄 PDF 문서 업로드", open=True):
        with gr.Row():
            with gr.Column(scale=1):
                file_input = gr.File(
                    label="PDF 문서 선택",
                    file_types=[".pdf"],
                    type="filepath"
                )
                with gr.Row():
                    convert_button = gr.Button("문서 변환", variant="primary")
                    clear_button = gr.Button("문서 초기화", variant="secondary")
                    test_button = gr.Button("문서 테스트", variant="secondary")
                
                status_text = gr.Textbox(
                    label="문서 상태", 
                    interactive=False,
                    value=check_document_status(),
                    lines=3
                )
            
            with gr.Column(scale=1):
                with gr.Accordion("변환된 문서 미리보기", open=False):
                    converted_text = gr.Textbox(
                        label="Markdown 변환 결과",
                        lines=10,
                        max_lines=20,
                        interactive=False
                    )
                    metadata_output = gr.JSON(label="메타데이터")
    
    # 고급 설정을 가장 아래에 배치
    with gr.Accordion("⚙️ 고급 설정", open=False):
        system_message = gr.Textbox(
            value="당신은 한국어로 답변하는 AI 어시스턴트입니다. PDF 문서가 제공되면 그 내용을 정확히 분석하여 답변합니다.",
            label="시스템 메시지",
            lines=3
        )
        max_tokens = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="최대 토큰 수")
        temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.3, step=0.1, label="Temperature (낮을수록 일관성 있음)")
        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.90, step=0.05, label="Top-p")
        top_k = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
        repeat_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
    
    # 이벤트 핸들러
    def user_submit(message, history):
        return "", history + [[message, None]]
    
    def bot_response(history, system_msg, max_tok, temp, top_p_val, top_k_val, rep_pen):
        if history and history[-1][1] is None:
            user_message = history[-1][0]
            
            # 디버깅: 문서 컨텍스트 상태 확인
            global document_context, document_filename
            print(f"\n=== BOT RESPONSE 시작 ===")
            print(f"사용자 메시지: {user_message}")
            if document_context:
                print(f"📄 문서 컨텍스트 활성: {document_filename} ({len(document_context)} 문자)")
                print(f"문서 첫 200자: {document_context[:200]}...")
            else:
                print("📭 문서 컨텍스트 없음")
            
            # 단순한 형식 사용 - [user_message, assistant_message]
            previous_history = []
            for i in range(len(history) - 1):
                if history[i][1] is not None:
                    previous_history.append({
                        "user": history[i][0],
                        "assistant": history[i][1]
                    })
            
            print(f"이전 대화 수: {len(previous_history)}")
            
            # 문서가 있는 경우 특별 처리
            if document_context and len(document_context) > 0:
                print(f"📄 문서 기반 응답 생성 중... (문서 길이: {len(document_context)})")
            
            bot_message = ""
            try:
                for token in respond(
                    user_message,
                    previous_history,
                    system_msg,
                    max_tok,
                    temp,
                    top_p_val,
                    top_k_val,
                    rep_pen
                ):
                    bot_message = token
                    history[-1][1] = bot_message
                    yield history
            except Exception as e:
                print(f"❌ 응답 생성 중 오류: {e}")
                import traceback
                traceback.print_exc()
                history[-1][1] = "죄송합니다. 응답 생성 중 오류가 발생했습니다. 다시 시도해주세요."
                yield history
    
    # PDF 변환 이벤트
    def on_pdf_convert(file):
        """PDF 변환 및 상태 업데이트"""
        global document_context, document_filename
        
        if file is None:
            return "", {}, "❌ 파일이 선택되지 않았습니다."
        
        markdown_content, metadata = convert_pdf_to_markdown(file)
        
        if "error" in metadata:
            status = f"❌ 변환 실패: {metadata['error']}"
        else:
            # 전역 변수 다시 한번 확인 및 설정 (globals() 사용)
            globals()['document_context'] = markdown_content
            globals()['document_filename'] = metadata['filename']
            
            status = f"✅ PDF 문서가 성공적으로 변환되었습니다!\n📄 파일명: {metadata['filename']}\n📏 문서 길이: {metadata['content_length']:,} 문자\n\n이제 문서 내용에 대해 한국어로 질문하실 수 있습니다.\n\n예시 질문:\n- 이 문서의 주요 내용을 요약해주세요\n- 문서에 나온 핵심 개념을 설명해주세요"
            
            print(f"\n✅ 문서 로드 완료 확인:")
            print(f"- globals()['document_context'] 길이: {len(globals()['document_context'])}")
            print(f"- globals()['document_filename']: {globals()['document_filename']}")
            
            # 최종 확인
            if len(globals()['document_context']) > 0:
                print("✅ 문서가 성공적으로 전역 변수에 저장되었습니다!")
            else:
                print("❌ 경고: 문서가 전역 변수에 저장되지 않았습니다!")
        
        return markdown_content, metadata, status
    
    # 파일 업로드 시 자동 변환
    file_input.change(
        fn=on_pdf_convert,
        inputs=[file_input],
        outputs=[converted_text, metadata_output, status_text]
    )
    
    # 수동 변환 버튼
    convert_button.click(
        fn=on_pdf_convert,
        inputs=[file_input],
        outputs=[converted_text, metadata_output, status_text]
    )
    
    # 문서 테스트 함수
    def test_document():
        """현재 로드된 문서 테스트"""
        global document_context, document_filename
        if document_context:
            test_msg = f"✅ 문서 테스트 결과:\n"
            test_msg += f"📄 파일명: {document_filename}\n"
            test_msg += f"📏 전체 길이: {len(document_context):,} 문자\n"
            test_msg += f"📝 첫 500자:\n{document_context[:500]}..."
            return test_msg
        else:
            return "❌ 현재 로드된 문서가 없습니다."
    
    test_button.click(
        fn=test_document,
        outputs=[status_text]
    )
    
    clear_button.click(
        fn=clear_document_context,
        outputs=[status_text]
    ).then(
        fn=lambda: ("", {}, check_document_status()),
        outputs=[converted_text, metadata_output, status_text]
    )
    
    # 채팅 이벤트
    msg.submit(user_submit, [msg, chatbot], [msg, chatbot]).then(
        bot_response, 
        [chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty], 
        chatbot
    )
    
    submit.click(user_submit, [msg, chatbot], [msg, chatbot]).then(
        bot_response, 
        [chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty], 
        chatbot
    )
    
    clear_chat.click(lambda: [], None, chatbot)

if __name__ == "__main__":
    # 필요한 디렉토리 생성
    os.makedirs("./models", exist_ok=True)
    
    # 환경 변수 확인
    if not HF_TOKEN:
        print("⚠️  경고: HF_TOKEN이 설정되지 않았습니다. 모델 다운로드에 제한이 있을 수 있습니다.")
        print("환경 변수를 설정하려면: export HF_TOKEN='your_huggingface_token'")
    
    demo.launch(
        server_name="0.0.0.0",  # 로컬 네트워크에서 접근 가능
        server_port=7860,
        share=False  # 온프레미스 환경이므로 공유 비활성화
    )