Spaces:

haepada
/

roots

Sleeping

App Files Files Community

haepada commited on Nov 5, 2024

Commit

7469d14

verified ·

1 Parent(s): 638a6b2

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -185

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import os
 import requests
 # 환경변수에서 토큰 가져오기
-HF_API_TOKEN = os.getenv("roots")  # 변경된 부분
 if not HF_API_TOKEN:
     raise ValueError("roots token not found in environment variables")
@@ -20,77 +20,80 @@ speech_recognizer = pipeline(
     "automatic-speech-recognition",
     model="kresnik/wav2vec2-large-xlsr-korean"
 )
-emotion_classifier = pipeline(
-    "audio-classification",
-    model="MIT/ast-finetuned-speech-commands-v2"
-)
 text_analyzer = pipeline(
     "sentiment-analysis",
     model="nlptown/bert-base-multilingual-uncased-sentiment"
 )
-korean_sentiment = pipeline(
-    "text-classification",
-    model="searle-j/korean_sentiment_analysis"
-)
-# 유틸리티 함수들
 def map_acoustic_to_emotion(features):
     """음향학적 특성을 감정으로 매핑"""
-    intensity = features["energy"] * 100
-    if features["energy"] > 0.7:
-        if features["tempo"] > 120:
-            emotion = "기쁨/흥분"
         else:
-            emotion = "분노/강조"
-    elif features["pitch"] > 0.6:
-        emotion = "놀람/관심"
-    elif features["energy"] < 0.3:
-        emotion = "슬픔/우울"
     else:
-        emotion = "평온/중립"
-    return {
-        "emotion": emotion,
-        "intensity": intensity,
-        "features": features
-    }
-def generate_detailed_prompt(text, voice_emotion, text_sentiment, acoustic_features):
-    """프롬프트 생성"""
-    emotion_colors = {
-        "기쁨/흥분": "밝은 노랑과 주황색",
-        "분노/강조": "강렬한 빨강과 검정",
-        "놀람/관심": "선명한 파랑과 보라",
-        "슬픔/우울": "어두운 파랑과 회색",
-        "평온/중립": "부드러운 초록과 베이지"
-    }
-    visual_elements = {
-        "high_energy": "역동적인 붓질과 강한 대비",
-        "medium_energy": "균형잡힌 구도와 자연스러운 흐름",
-        "low_energy": "부드러운 그라데이션과 차분한 톤"
     }
-    energy_level = "medium_energy"
-    if acoustic_features["energy"] > 0.7:
-        energy_level = "high_energy"
-    elif acoustic_features["energy"] < 0.3:
-        energy_level = "low_energy"
-    prompt = f"한국 전통 민화 스타일의 추상화, {emotion_colors.get(voice_emotion['emotion'], '자연스러운 색상')} 기반. "
-    prompt += f"{visual_elements[energy_level]}를 통해 감정의 깊이를 표현. "
-    prompt += f"음성의 {voice_emotion['emotion']} 감정과 텍스트의 {text_sentiment['label']} 감정이 조화를 이루며, "
-    prompt += f"목소리의 특징(강도:{voice_emotion['intensity']:.1f})을 화면의 동적인 요소로 표현. "
-    prompt += f"발화 내용 '{text}'의 의미를 은유적 이미지로 담아내기."
-    return prompt
 def generate_image_from_prompt(prompt):
-    """이미지 생성"""
     print(f"Generating image with prompt: {prompt}")
     try:
         if not prompt:
             return None
         response = requests.post(
@@ -107,18 +110,48 @@ def generate_image_from_prompt(prompt):
         )
         if response.status_code == 200:
             return response.content
         else:
             print(f"Error: {response.status_code}")
             print(f"Response: {response.text}")
             return None
     except Exception as e:
         print(f"Error generating image: {str(e)}")
         return None
 def create_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as app:
-        # 상태 관리
         state = gr.State({
             "user_name": "",
             "reflections": [],
@@ -130,86 +163,8 @@ def create_interface():
         header = gr.Markdown("# 디지털 굿판")
         user_display = gr.Markdown("")
-        with gr.Tabs() as tabs:
-            # 입장
-            with gr.Tab("입장"):
-                gr.Markdown("""# 디지털 굿판에 오신 것을 환영합니다""")
-                name_input = gr.Textbox(label="이름을 알려주세요")
-                start_btn = gr.Button("여정 시작하기")
-            # 청신
-            with gr.Tab("청신"):
-                with gr.Row():
-                    audio_path = os.path.abspath(os.path.join("assets", "main_music.mp3"))
-                    audio = gr.Audio(
-                        value=audio_path,
-                        type="filepath",
-                        label="온천천의 소리",
-                        interactive=False,
-                        autoplay=True
-                    )
-                    with gr.Column():
-                        reflection_input = gr.Textbox(
-                            label="현재 순간의 감상을 적어주세요",
-                            lines=3
-                        )
-                        save_btn = gr.Button("감상 저장하기")
-                        reflections_display = gr.Dataframe(
-                            headers=["시간", "감상", "감정 분석"],
-                            label="기록된 감상들"
-                        )
-            # 기원
-            with gr.Tab("기원"):
-                gr.Markdown("## 기원 - 목소리로 전하기")
-                with gr.Row():
-                    with gr.Column():
-                        voice_input = gr.Audio(
-                            label="나누고 싶은 이야기를 들려주세요",
-                            sources=["microphone"],
-                            type="filepath",
-                            interactive=True
-                        )
-                        clear_btn = gr.Button("녹음 지우기")
-                    with gr.Column():
-                        transcribed_text = gr.Textbox(
-                            label="인식된 텍스트",
-                            interactive=False
-                        )
-                        voice_emotion = gr.Textbox(
-                            label="음성 감정 분석",
-                            interactive=False
-                        )
-                        text_emotion = gr.Textbox(
-                            label="텍스트 감정 분석",
-                            interactive=False
-                        )
-                        analyze_btn = gr.Button("분석하기")
-            # 송신
-            with gr.Tab("송신"):
-                gr.Markdown("## 송신 - 시각화 결과")
-                with gr.Column():
-                    final_prompt = gr.Textbox(
-                        label="생성된 프롬프트",
-                        interactive=False,
-                        lines=3
-                    )
-                    generate_btn = gr.Button("이미지 생성하기")
-                    result_image = gr.Image(
-                        label="생성된 이미지",
-                        type="pil"
-                    )
-        # 인터페이스 함수들
-        def start_journey(name):
-            """여정 시작"""
-            return f"# 환영합니다, {name}님의 디지털 굿판", gr.update(selected="청신")
-        def clear_voice_input():
-            """음성 입력 초기화"""
-            return None
         def analyze_voice(audio_path, state):
             """음성 분석"""
@@ -219,6 +174,7 @@ def create_interface():
             try:
                 y, sr = librosa.load(audio_path, sr=16000)
                 acoustic_features = {
                     "energy": float(np.mean(librosa.feature.rms(y=y))),
                     "tempo": float(librosa.beat.tempo(y)[0]),
@@ -226,65 +182,39 @@ def create_interface():
                     "volume": float(np.mean(np.abs(y)))
                 }
-                voice_emotion = map_acoustic_to_emotion(acoustic_features)
                 transcription = speech_recognizer(y)
                 text = transcription["text"]
-                text_sentiment = korean_sentiment(text)[0]
-                voice_result = f"음성 감정: {voice_emotion['emotion']} (강도: {voice_emotion['intensity']:.2f})"
-                text_result = f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})"
-                prompt = generate_detailed_prompt(text, voice_emotion, text_sentiment, acoustic_features)
                 return state, text, voice_result, text_result, prompt
             except Exception as e:
                 return state, f"오류 발생: {str(e)}", "", "", ""
-        def save_reflection(text, state):
-            """감상 저장"""
-            if not text.strip():
-                return state, state["reflections"]
-            current_time = datetime.now().strftime("%H:%M:%S")
-            sentiment = text_analyzer(text)[0]
-            new_reflection = [current_time, text, f"{sentiment['label']} ({sentiment['score']:.2f})"]
-            if "reflections" not in state:
-                state["reflections"] = []
-            state["reflections"].append(new_reflection)
-            return state, state["reflections"]
-        # 이벤트 연결
-        start_btn.click(
-            fn=lambda name: (f"# 환영합니다, {name}님의 디지털 굿판", gr.update(selected="청신")),
-            inputs=[name_input],
-            outputs=[user_display, tabs]
-        )
-        save_btn.click(
-            fn=save_reflection,
-            inputs=[reflection_input, state],
-            outputs=[state, reflections_display]
-        )
-        clear_btn.click(
-            fn=clear_voice_input,
-            inputs=[],
-            outputs=[voice_input]
-        )
-        analyze_btn.click(
-            fn=analyze_voice,
-            inputs=[voice_input, state],
-            outputs=[state, transcribed_text, voice_emotion, text_emotion, final_prompt]
-        )
-        generate_btn.click(
-            fn=generate_image_from_prompt,
-            inputs=[final_prompt],
-            outputs=[result_image]
-        )
         return app

 import requests
 # 환경변수에서 토큰 가져오기
+HF_API_TOKEN = os.getenv("roots")
 if not HF_API_TOKEN:
     raise ValueError("roots token not found in environment variables")
     "automatic-speech-recognition",
     model="kresnik/wav2vec2-large-xlsr-korean"
 )
 text_analyzer = pipeline(
     "sentiment-analysis",
     model="nlptown/bert-base-multilingual-uncased-sentiment"
 )
 def map_acoustic_to_emotion(features):
     """음향학적 특성을 감정으로 매핑"""
+    # 음성 특성 정규화
+    energy_norm = min(features["energy"] * 100, 100)  # 에너지 레벨 (0-100)
+    tempo_norm = min(features["tempo"] / 200, 1)  # 템포 정규화 (0-1)
+    pitch_norm = min(features["pitch"] * 2, 1)  # 피치 정규화 (0-1)
+    # 상세 감정 분석
+    emotions = {
+        "primary": "",
+        "intensity": energy_norm,
+        "confidence": 0.0,
+        "secondary": "",
+        "characteristics": []
+    }
+    # 주요 감정 결정
+    if energy_norm > 70:
+        if tempo_norm > 0.6:
+            emotions["primary"] = "기쁨/열정"
+            emotions["characteristics"].append("빠르고 활기찬 말하기 패턴")
         else:
+            emotions["primary"] = "분노/강조"
+            emotions["characteristics"].append("강한 음성 강도")
+        emotions["confidence"] = energy_norm / 100
+    elif pitch_norm > 0.6:
+        if energy_norm > 50:
+            emotions["primary"] = "놀람/흥분"
+            emotions["characteristics"].append("높은 음고와 강한 강세")
+        else:
+            emotions["primary"] = "관심/호기심"
+            emotions["characteristics"].append("음고 변화가 큼")
+        emotions["confidence"] = pitch_norm
+    elif energy_norm < 30:
+        if tempo_norm < 0.4:
+            emotions["primary"] = "슬픔/우울"
+            emotions["characteristics"].append("느리고 약한 음성")
+        else:
+            emotions["primary"] = "피로/무기력"
+            emotions["characteristics"].append("낮은 에너지 레벨")
+        emotions["confidence"] = (30 - energy_norm) / 30
     else:
+        if tempo_norm > 0.5:
+            emotions["primary"] = "평온/안정"
+            emotions["characteristics"].append("균형잡힌 말하기 패턴")
+        else:
+            emotions["primary"] = "차분/진지"
+            emotions["characteristics"].append("안정적인 음성 특성")
+        emotions["confidence"] = 0.5
+    # 음성 특성 상세 분석
+    emotions["details"] = {
+        "energy_level": f"{energy_norm:.1f}%",
+        "speech_rate": f"{'빠름' if tempo_norm > 0.6 else '보통' if tempo_norm > 0.4 else '느림'}",
+        "pitch_variation": f"{'높음' if pitch_norm > 0.6 else '보통' if pitch_norm > 0.3 else '낮음'}",
+        "voice_volume": f"{'큼' if features['volume'] > 0.7 else '보통' if features['volume'] > 0.3 else '작음'}"
     }
+    return emotions
 def generate_image_from_prompt(prompt):
+    """이미지 생성 함수"""
     print(f"Generating image with prompt: {prompt}")
     try:
         if not prompt:
+            print("No prompt provided")
             return None
         response = requests.post(
         )
         if response.status_code == 200:
+            print("Image generated successfully")
             return response.content
         else:
             print(f"Error: {response.status_code}")
             print(f"Response: {response.text}")
             return None
     except Exception as e:
         print(f"Error generating image: {str(e)}")
         return None
+def generate_detailed_prompt(text, emotions, text_sentiment):
+    """감정 기반 상세 프롬프트 생성"""
+    emotion_colors = {
+        "기쁨/열정": "밝은 노랑과 따뜻한 주황색",
+        "분노/강조": "강렬한 빨강과 짙은 검정",
+        "놀람/흥분": "선명한 파랑과 밝은 보라",
+        "관심/호기심": "연한 하늘색과 민트색",
+        "슬픔/우울": "어두운 파랑과 회색",
+        "피로/무기력": "탁한 갈색과 짙은 회색",
+        "평온/안정": "부드러운 초록과 베이지",
+        "차분/진지": "차분한 남색과 깊은 보라"
+    }
+    # 감정 강도에 따른 시각적 표현
+    if emotions["intensity"] > 70:
+        visual_style = "역동적인 붓질과 강한 대비"
+    elif emotions["intensity"] > 40:
+        visual_style = "균형잡힌 구도와 중간 톤의 조화"
+    else:
+        visual_style = "부드러운 그라데이션과 차분한 톤"
+    # 프롬프트 구성
+    prompt = f"한국 전통 민화 스타일의 추상화, {emotion_colors.get(emotions['primary'], '자연스러운 색상')} 기반. "
+    prompt += f"{visual_style}로 표현된 {emotions['primary']}의 감정. "
+    prompt += f"음성의 특징({', '.join(emotions['characteristics'])})을 화면의 동적 요소로 표현. "
+    prompt += f"발화 내용 '{text}'에서 느껴지는 감정(강도: {text_sentiment['score']}/5)을 은유적 이미지로 담아내기."
+    return prompt
 def create_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as app:
         state = gr.State({
             "user_name": "",
             "reflections": [],
         header = gr.Markdown("# 디지털 굿판")
         user_display = gr.Markdown("")
+        # 나머지 인터페이스 코드는 동일하게 유지...
+        [이전 코드와 동일한 부분 생략]
         def analyze_voice(audio_path, state):
             """음성 분석"""
             try:
                 y, sr = librosa.load(audio_path, sr=16000)
+                # 음향학적 특성 분석
                 acoustic_features = {
                     "energy": float(np.mean(librosa.feature.rms(y=y))),
                     "tempo": float(librosa.beat.tempo(y)[0]),
                     "volume": float(np.mean(np.abs(y)))
                 }
+                # 감정 분석
+                emotions = map_acoustic_to_emotion(acoustic_features)
+                # 음성 인식
                 transcription = speech_recognizer(y)
                 text = transcription["text"]
+                # 텍스트 감정 분석
+                text_sentiment = text_analyzer(text)[0]
+                # 결과 포맷팅
+                voice_result = (
+                    f"음성 감정: {emotions['primary']} "
+                    f"(강도: {emotions['intensity']:.1f}%, 신뢰도: {emotions['confidence']:.2f})\n"
+                    f"특징: {', '.join(emotions['characteristics'])}\n"
+                    f"상세 분석:\n"
+                    f"- 에너지 레벨: {emotions['details']['energy_level']}\n"
+                    f"- 말하기 속도: {emotions['details']['speech_rate']}\n"
+                    f"- 음높이 변화: {emotions['details']['pitch_variation']}\n"
+                    f"- 음성 크기: {emotions['details']['voice_volume']}"
+                )
+                text_result = f"텍스트 감정 분석 (1-5): {text_sentiment['score']}"
+                # 프롬프트 생성
+                prompt = generate_detailed_prompt(text, emotions, text_sentiment)
                 return state, text, voice_result, text_result, prompt
             except Exception as e:
                 return state, f"오류 발생: {str(e)}", "", "", ""
+        # 이벤트 연결도 동일하게 유지...
+        [이전 코드와 동일한 부분 생략]
         return app