import gradio as gr
import numpy as np
import librosa
from transformers import pipeline
from datetime import datetime
import os
import requests

# 환경변수에서 토큰 가져오기
HF_API_TOKEN = os.getenv("roots")
if not HF_API_TOKEN:
    raise ValueError("roots token not found in environment variables")

# Inference API 설정
API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0"
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}

# AI 모델 초기화
speech_recognizer = pipeline(
    "automatic-speech-recognition",
    model="kresnik/wav2vec2-large-xlsr-korean"
)
text_analyzer = pipeline(
    "sentiment-analysis",
    model="nlptown/bert-base-multilingual-uncased-sentiment"
)

def map_acoustic_to_emotion(features):
    """음향학적 특성을 감정으로 매핑"""
    # 음성 특성 정규화
    energy_norm = min(features["energy"] * 100, 100)  # 에너지 레벨 (0-100)
    tempo_norm = min(features["tempo"] / 200, 1)  # 템포 정규화 (0-1)
    pitch_norm = min(features["pitch"] * 2, 1)  # 피치 정규화 (0-1)
    
    # 상세 감정 분석
    emotions = {
        "primary": "",
        "intensity": energy_norm,
        "confidence": 0.0,
        "secondary": "",
        "characteristics": []
    }
    
    # 주요 감정 결정
    if energy_norm > 70:
        if tempo_norm > 0.6:
            emotions["primary"] = "기쁨/열정"
            emotions["characteristics"].append("빠르고 활기찬 말하기 패턴")
        else:
            emotions["primary"] = "분노/강조"
            emotions["characteristics"].append("강한 음성 강도")
        emotions["confidence"] = energy_norm / 100
        
    elif pitch_norm > 0.6:
        if energy_norm > 50:
            emotions["primary"] = "놀람/흥분"
            emotions["characteristics"].append("높은 음고와 강한 강세")
        else:
            emotions["primary"] = "관심/호기심"
            emotions["characteristics"].append("음고 변화가 큼")
        emotions["confidence"] = pitch_norm
        
    elif energy_norm < 30:
        if tempo_norm < 0.4:
            emotions["primary"] = "슬픔/우울"
            emotions["characteristics"].append("느리고 약한 음성")
        else:
            emotions["primary"] = "피로/무기력"
            emotions["characteristics"].append("낮은 에너지 레벨")
        emotions["confidence"] = (30 - energy_norm) / 30
        
    else:
        if tempo_norm > 0.5:
            emotions["primary"] = "평온/안정"
            emotions["characteristics"].append("균형잡힌 말하기 패턴")
        else:
            emotions["primary"] = "차분/진지"
            emotions["characteristics"].append("안정적인 음성 특성")
        emotions["confidence"] = 0.5
    
    # 음성 특성 상세 분석
    emotions["details"] = {
        "energy_level": f"{energy_norm:.1f}%",
        "speech_rate": f"{'빠름' if tempo_norm > 0.6 else '보통' if tempo_norm > 0.4 else '느림'}",
        "pitch_variation": f"{'높음' if pitch_norm > 0.6 else '보통' if pitch_norm > 0.3 else '낮음'}",
        "voice_volume": f"{'큼' if features['volume'] > 0.7 else '보통' if features['volume'] > 0.3 else '작음'}"
    }
    
    return emotions

def generate_image_from_prompt(prompt):
    """이미지 생성 함수"""
    print(f"Generating image with prompt: {prompt}")
    try:
        if not prompt:
            print("No prompt provided")
            return None
        
        response = requests.post(
            API_URL,
            headers=headers,
            json={
                "inputs": prompt,
                "parameters": {
                    "negative_prompt": "ugly, blurry, poor quality, distorted",
                    "num_inference_steps": 30,
                    "guidance_scale": 7.5
                }
            }
        )
        
        if response.status_code == 200:
            print("Image generated successfully")
            return response.content
        else:
            print(f"Error: {response.status_code}")
            print(f"Response: {response.text}")
            return None
            
    except Exception as e:
        print(f"Error generating image: {str(e)}")
        return None

def generate_detailed_prompt(text, emotions, text_sentiment):
    """감정 기반 상세 프롬프트 생성"""
    emotion_colors = {
        "기쁨/열정": "밝은 노랑과 따뜻한 주황색",
        "분노/강조": "강렬한 빨강과 짙은 검정",
        "놀람/흥분": "선명한 파랑과 밝은 보라",
        "관심/호기심": "연한 하늘색과 민트색",
        "슬픔/우울": "어두운 파랑과 회색",
        "피로/무기력": "탁한 갈색과 짙은 회색",
        "평온/안정": "부드러운 초록과 베이지",
        "차분/진지": "차분한 남색과 깊은 보라"
    }
    
    # 감정 강도에 따른 시각적 표현
    if emotions["intensity"] > 70:
        visual_style = "역동적인 붓질과 강한 대비"
    elif emotions["intensity"] > 40:
        visual_style = "균형잡힌 구도와 중간 톤의 조화"
    else:
        visual_style = "부드러운 그라데이션과 차분한 톤"

    # 프롬프트 구성
    prompt = f"한국 전통 민화 스타일의 추상화, {emotion_colors.get(emotions['primary'], '자연스러운 색상')} 기반. "
    prompt += f"{visual_style}로 표현된 {emotions['primary']}의 감정. "
    prompt += f"음성의 특징({', '.join(emotions['characteristics'])})을 화면의 동적 요소로 표현. "
    prompt += f"발화 내용 '{text}'에서 느껴지는 감정(강도: {text_sentiment['score']}/5)을 은유적 이미지로 담아내기."
    
    return prompt

def create_interface():
    with gr.Blocks(theme=gr.themes.Soft()) as app:
        state = gr.State({
            "user_name": "",
            "reflections": [],
            "voice_analysis": None,
            "final_prompt": ""
        })

        # 헤더
        header = gr.Markdown("# 디지털 굿판")
        user_display = gr.Markdown("")

        # 나머지 인터페이스 코드는 동일하게 유지...
        [이전 코드와 동일한 부분 생략]

        def analyze_voice(audio_path, state):
            """음성 분석"""
            if audio_path is None:
                return state, "음성을 먼저 녹음해주세요.", "", "", ""
            
            try:
                y, sr = librosa.load(audio_path, sr=16000)
                
                # 음향학적 특성 분석
                acoustic_features = {
                    "energy": float(np.mean(librosa.feature.rms(y=y))),
                    "tempo": float(librosa.beat.tempo(y)[0]),
                    "pitch": float(np.mean(librosa.feature.zero_crossing_rate(y))),
                    "volume": float(np.mean(np.abs(y)))
                }

                # 감정 분석
                emotions = map_acoustic_to_emotion(acoustic_features)
                
                # 음성 인식
                transcription = speech_recognizer(y)
                text = transcription["text"]
                
                # 텍스트 감정 분석
                text_sentiment = text_analyzer(text)[0]
                
                # 결과 포맷팅
                voice_result = (
                    f"음성 감정: {emotions['primary']} "
                    f"(강도: {emotions['intensity']:.1f}%, 신뢰도: {emotions['confidence']:.2f})\n"
                    f"특징: {', '.join(emotions['characteristics'])}\n"
                    f"상세 분석:\n"
                    f"- 에너지 레벨: {emotions['details']['energy_level']}\n"
                    f"- 말하기 속도: {emotions['details']['speech_rate']}\n"
                    f"- 음높이 변화: {emotions['details']['pitch_variation']}\n"
                    f"- 음성 크기: {emotions['details']['voice_volume']}"
                )
                
                text_result = f"텍스트 감정 분석 (1-5): {text_sentiment['score']}"
                
                # 프롬프트 생성
                prompt = generate_detailed_prompt(text, emotions, text_sentiment)
                
                return state, text, voice_result, text_result, prompt
            except Exception as e:
                return state, f"오류 발생: {str(e)}", "", "", ""

        # 이벤트 연결도 동일하게 유지...
        [이전 코드와 동일한 부분 생략]

        return app

if __name__ == "__main__":
    demo = create_interface()
    demo.launch(debug=True)