import gradio as gr
import numpy as np
import librosa
from transformers import pipeline
from datetime import datetime
import os
import requests

# Inference API 설정
API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0"
headers = {"Authorization": "Bearer hf_..."}  # 여기에 HuggingFace 토큰 입력

# AI 모델 초기화
speech_recognizer = pipeline(
    "automatic-speech-recognition",
    model="kresnik/wav2vec2-large-xlsr-korean"
)
emotion_classifier = pipeline(
    "audio-classification",
    model="MIT/ast-finetuned-speech-commands-v2"
)
text_analyzer = pipeline(
    "sentiment-analysis",
    model="nlptown/bert-base-multilingual-uncased-sentiment"
)

def create_interface():
    with gr.Blocks(theme=gr.themes.Soft()) as app:
        state = gr.State({
            "user_name": "",
            "reflections": [],
            "voice_analysis": None,
            "final_prompt": ""
        })
def generate_image_from_prompt(prompt):
    """HuggingFace Inference API를 통한 이미지 생성"""
    try:
        response = requests.post(API_URL, headers=headers, json={
            "inputs": prompt,
            "parameters": {
                "negative_prompt": "ugly, blurry, poor quality, distorted",
                "num_inference_steps": 30,
                "guidance_scale": 7.5
            }
        })
        
        if response.status_code == 200:
            return response.content  # 바이너리 이미지 데이터 반환
        else:
            return None
    except Exception as e:
        print(f"Error generating image: {e}")
        return None


        # 헤더
        header = gr.Markdown("# 디지털 굿판")
        user_display = gr.Markdown("")

        with gr.Tabs() as tabs:
            # 입장
            with gr.Tab("입장"):
                gr.Markdown("""# 디지털 굿판에 오신 것을 환영합니다""")
                name_input = gr.Textbox(label="이름을 알려주세요")
                start_btn = gr.Button("여정 시작하기")

            # 청신
            with gr.Tab("청신"):
                with gr.Row():
                    # 절대 경로로 변경
                    audio_path = os.path.abspath(os.path.join("assets", "main_music.mp3"))
                    audio = gr.Audio(
                        value=audio_path,
                        type="filepath",
                        label="온천천의 소리",
                        interactive=False,
                        autoplay=True
                    )
                    with gr.Column():
                        reflection_input = gr.Textbox(
                            label="현재 순간의 감상을 적어주세요",
                            lines=3
                        )
                        save_btn = gr.Button("감상 저장하기")
                        reflections_display = gr.Dataframe(
                            headers=["시간", "감상", "감정 분석"],
                            label="기록된 감상들"
                        )

            # 기원
            with gr.Tab("기원"):
                gr.Markdown("## 기원 - 목소리로 전하기")
                with gr.Row():
                    with gr.Column():
                        voice_input = gr.Audio(
                            label="나누고 싶은 이야기를 들려주세요",
                            sources=["microphone"],
                            type="filepath",
                            interactive=True
                        )
                        clear_btn = gr.Button("녹음 지우기")
                    
                    with gr.Column():
                        transcribed_text = gr.Textbox(
                            label="인식된 텍스트",
                            interactive=False
                        )
                        voice_emotion = gr.Textbox(
                            label="음성 감정 분석",
                            interactive=False
                        )
                        text_emotion = gr.Textbox(
                            label="텍스트 감정 분석",
                            interactive=False
                        )
                        analyze_btn = gr.Button("분석하기")

   # 이벤트 핸들러 추가
generate_btn.click(
    fn=generate_image_from_prompt,
    inputs=[final_prompt],
    outputs=[result_image]
)    
# 송신 탭 부분 수정
with gr.Tab("송신"):
    gr.Markdown("## 송신 - 시각화 결과")
    with gr.Column():
        final_prompt = gr.Textbox(
            label="생성된 프롬프트",
            interactive=False,
            lines=3
        )
        generate_btn = gr.Button("이미지 생성하기")
        result_image = gr.Image(
            label="생성된 이미지",
            type="pil"
        )

        def clear_voice_input():
            """음성 입력 초기화"""
            return None

        def analyze_voice(audio_path, state):
            """음성 분석"""
            if audio_path is None:
                return state, "음성을 먼저 녹음해주세요.", "", "", ""
            
            try:
                # 오디오 로드
                y, sr = librosa.load(audio_path, sr=16000)
                
                # 음성 인식
                transcription = speech_recognizer(y)
                text = transcription["text"]
                
                # 감정 분석
                voice_emotions = emotion_classifier(y)
                text_sentiment = text_analyzer(text)[0]

                # 프롬프트 생성
                prompt = generate_prompt(text, voice_emotions[0], text_sentiment)
                
                return (
                    state,
                    text,
                    f"음성 감정: {voice_emotions[0]['label']} ({voice_emotions[0]['score']:.2f})",
                    f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})",
                    prompt
                )
            except Exception as e:
                return state, f"오류 발생: {str(e)}", "", "", ""

        def generate_prompt(text, voice_emotion, text_sentiment):
            """프롬프트 생성"""
            emotion_colors = {
                "happy": "따뜻한 노란색과 주황색",
                "sad": "깊은 파랑색과 보라색",
                "angry": "강렬한 빨강색과 검정색",
                "neutral": "부드러운 회색과 베이지색"
            }
            
            color = emotion_colors.get(voice_emotion['label'], "자연스러운 색상")
            prompt = f"한국 전통 민화 스타일의 추상화, {color} 사용. "
            prompt += f"음성의 감정({voice_emotion['label']})과 텍스트의 감정({text_sentiment['label']})이 조화를 이루며, "
            prompt += f"음성의 특징을 반영한 동적인 구도. 발화 내용: '{text}'"
            
            return prompt

        def save_reflection(text, state):
            """감상 저장"""
            if not text.strip():
                return state, state["reflections"]
            
            current_time = datetime.now().strftime("%H:%M:%S")
            sentiment = text_analyzer(text)[0]
            new_reflection = [current_time, text, f"{sentiment['label']} ({sentiment['score']:.2f})"]
            
            if "reflections" not in state:
                state["reflections"] = []
                
            state["reflections"].append(new_reflection)
            return state, state["reflections"]

        # 이벤트 연결
        start_btn.click(
            fn=lambda name: (f"# 환영합니다, {name}님의 디지털 굿판", gr.update(selected="청신")),
            inputs=[name_input],
            outputs=[user_display, tabs]
        )

        save_btn.click(
            fn=save_reflection,
            inputs=[reflection_input, state],
            outputs=[state, reflections_display]
        )

        clear_btn.click(
            fn=clear_voice_input,
            inputs=[],
            outputs=[voice_input]
        )

        analyze_btn.click(
            fn=analyze_voice,
            inputs=[voice_input, state],
            outputs=[state, transcribed_text, voice_emotion, text_emotion, final_prompt]
        )

    return app

if __name__ == "__main__":
    demo = create_interface()
    demo.launch()