Spaces:

haepada
/

roots

Sleeping

File size: 9,057 Bytes

import gradio as gr
import numpy as np
import librosa
from transformers import pipeline
from datetime import datetime
import os

# AI 모델 초기화
speech_recognizer = pipeline("automatic-speech-recognition", 
                           model="kresnik/wav2vec2-large-xlsr-korean")
emotion_classifier = pipeline("audio-classification", 
                            model="MIT/ast-finetuned-speech-commands-v2")
text_analyzer = pipeline("sentiment-analysis", 
                        model="nlptown/bert-base-multilingual-uncased-sentiment")

def create_interface():
    with gr.Blocks(theme=gr.themes.Soft()) as app:
        # 상태 관리
        state = gr.State({
            "stage": "intro",
            "user_name": "",
            "reflections": [],
            "voice_analyses": [],
            "current_prompt": "",
            "generated_images": []
        })

        with gr.Column():
            # 사용자 이름 표시
            user_name_display = gr.Markdown("", elem_id="user-name-display")
            
            # 단계별 탭
            with gr.Tabs() as tabs:
                # 1. 입장
                with gr.Tab("입장", id="intro"):
                    gr.Markdown("""
                    # 디지털 굿판에 오신 것을 환영합니다
                    온천천의 디지털 치유 공간으로 들어가보세요.
                    """)
                    name_input = gr.Textbox(label="이름을 알려주세요")
                    start_button = gr.Button("여정 시작하기")

                # 2. 청신
                with gr.Tab("청신", id="cleansing"):
                    gr.Markdown("## 청신 - 소리로 정화하기")
                    with gr.Row():
                        # 음악 플레이어
                        audio_player = gr.Audio(
                            value=os.path.join(os.path.dirname(__file__), "assets", "main_music.mp3"),
                            type="filepath",
                            label="온천천의 소리",
                            interactive=True,
                            autoplay=True
                        )
                        
                        # 감상 입력
                        with gr.Column():
                            reflection_input = gr.Textbox(
                                label="현재 순간의 감상을 적어주세요",
                                lines=3
                            )
                            save_reflection = gr.Button("감상 저장")
                            reflections_display = gr.Dataframe(
                                headers=["시간", "감상", "감정"],
                                label="기록된 감상들"
                            )

                # 3. 기원
                with gr.Tab("기원", id="prayer"):
                    gr.Markdown("## 기원 - 목소리로 전하기")
                    with gr.Row():
                        # 음성 입력
                        voice_input = gr.Audio(
                            label="나누고 싶은 이야기를 들려주세요",
                            sources=["microphone"],
                            type="filepath",
                            interactive=True
                        )
                        
                        # 분석 결과 표시
                        with gr.Column():
                            analysis_output = gr.JSON(
                                label="음성 분석 결과"
                            )
                            text_output = gr.Textbox(
                                label="인식된 텍스트",
                                interactive=False
                            )
                            emotion_output = gr.Textbox(
                                label="감정 분석",
                                interactive=False
                            )

                # 4. 송신
                with gr.Tab("송신", id="sharing"):
                    gr.Markdown("## 송신 - 함께 나누기")
                    with gr.Row():
                        # 프롬프트 및 이미지 표시
                        final_prompt = gr.Textbox(
                            label="생성된 프롬프트",
                            interactive=False
                        )
                        generated_gallery = gr.Gallery(
                            label="시각화 결과",
                            columns=2,
                            height="auto"
                        )

        # 함수 정의
        def start_journey(name, state):
            """여정 시작 함수"""
            state["user_name"] = name
            return (
                state,
                f"# 환영합니다, {name}님",
                gr.update(selected="cleansing")
            )

        def analyze_voice(audio_path, state):
            """종합적인 음성 분석 함수"""
            try:
                if audio_path is None:
                    return state, {"error": "음성 입력이 없습니다."}, "", ""

                # 오디오 로드
                y, sr = librosa.load(audio_path)

                # 1. 음향학적 특성 분석
                acoustic_features = {
                    "energy": float(np.mean(librosa.feature.rms(y=y))),
                    "pitch_mean": float(np.mean(librosa.pitch_tuning(y))),
                    "tempo": float(librosa.beat.tempo(y)[0]),
                    "mfcc": librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1).tolist(),
                    "zero_crossing_rate": float(np.mean(librosa.feature.zero_crossing_rate(y)))
                }

                # 2. 음성 감정 분석
                emotion_result = emotion_classifier(y)
                
                # 3. 음성-텍스트 변환
                text_result = speech_recognizer(y)
                
                # 4. 텍스트 감정 분석
                text_sentiment = text_analyzer(text_result["text"])[0]

                # 결과 종합
                analysis_result = {
                    "acoustic_analysis": acoustic_features,
                    "voice_emotion": emotion_result[0],
                    "text": text_result["text"],
                    "text_sentiment": text_sentiment
                }

                # 프롬프트 생성
                prompt = generate_art_prompt(analysis_result)
                state["current_prompt"] = prompt
                
                return (
                    state,
                    analysis_result,
                    text_result["text"],
                    f"음성 감정: {emotion_result[0]['label']} ({emotion_result[0]['score']:.2f})\n"
                    f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})"
                )

            except Exception as e:
                return state, {"error": str(e)}, "", ""

        def save_reflection(text, state):
            """감상 저장 함수"""
            if not text.strip():
                return state, state["reflections"]
            
            current_time = datetime.now().strftime("%H:%M:%S")
            sentiment = text_analyzer(text)[0]
            new_reflection = [current_time, text, sentiment["label"]]
            
            state["reflections"].append(new_reflection)
            return state, state["reflections"]

        def generate_art_prompt(analysis):
            """분석 결과를 바탕으로 예술적 프롬프트 생성"""
            emotion = analysis["voice_emotion"]["label"]
            energy = analysis["acoustic_analysis"]["energy"]
            text_sentiment = analysis["text_sentiment"]["label"]
            
            colors = {
                "happy": "따뜻한 노란색과 주황색",
                "sad": "차분한 파랑색과 보라색",
                "angry": "강렬한 빨강색과 검정색",
                "neutral": "부드러운 회색과 베이지색"
            }
            
            prompt = f"한국 전통 민화 스타일로, {emotion}의 감정을 {colors.get(emotion, '자연스러운 색상')}으로 표현한 추상화. "
            prompt += f"음성의 에너지({energy:.2f})를 채도로 표현하고, "
            prompt += f"텍스트의 감정({text_sentiment})을 구도에 반영."
            
            return prompt

        # 이벤트 연결
        start_button.click(
            fn=start_journey,
            inputs=[name_input, state],
            outputs=[state, user_name_display, tabs]
        )

        save_reflection.click(
            fn=save_reflection,
            inputs=[reflection_input, state],
            outputs=[state, reflections_display]
        )

        voice_input.change(
            fn=analyze_voice,
            inputs=[voice_input, state],
            outputs=[state, analysis_output, text_output, emotion_output]
        )

    return app

if __name__ == "__main__":
    interface = create_interface()
    interface.launch()