Spaces:

haepada
/

roots

Sleeping

App Files Files Community

haepada commited on Nov 5, 2024

Commit

a29bd41

verified ·

1 Parent(s): 45493cf

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -52

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import gradio as gr
 import numpy as np
 import librosa
@@ -25,14 +27,35 @@ text_analyzer = pipeline(
     model="nlptown/bert-base-multilingual-uncased-sentiment"
 )
-def map_acoustic_to_emotion(features):
-    """음향학적 특성을 감정으로 매핑"""
     # 음성 특성 정규화
-    energy_norm = min(features["energy"] * 100, 100)  # 에너지 레벨 (0-100)
-    tempo_norm = min(features["tempo"] / 200, 1)  # 템포 정규화 (0-1)
-    pitch_norm = min(features["pitch"] * 2, 1)  # 피치 정규화 (0-1)
-    # 상세 감정 분석
     emotions = {
         "primary": "",
         "intensity": energy_norm,
@@ -40,8 +63,8 @@ def map_acoustic_to_emotion(features):
         "secondary": "",
         "characteristics": []
     }
-    # 주요 감정 결정
     if energy_norm > 70:
         if tempo_norm > 0.6:
             emotions["primary"] = "기쁨/열정"
@@ -50,7 +73,6 @@ def map_acoustic_to_emotion(features):
             emotions["primary"] = "분노/강조"
             emotions["characteristics"].append("강한 음성 강도")
         emotions["confidence"] = energy_norm / 100
     elif pitch_norm > 0.6:
         if energy_norm > 50:
             emotions["primary"] = "놀람/흥분"
@@ -59,7 +81,6 @@ def map_acoustic_to_emotion(features):
             emotions["primary"] = "관심/호기심"
             emotions["characteristics"].append("음고 변화가 큼")
         emotions["confidence"] = pitch_norm
     elif energy_norm < 30:
         if tempo_norm < 0.4:
             emotions["primary"] = "슬픔/우울"
@@ -68,7 +89,6 @@ def map_acoustic_to_emotion(features):
             emotions["primary"] = "피로/무기력"
             emotions["characteristics"].append("낮은 에너지 레벨")
         emotions["confidence"] = (30 - energy_norm) / 30
     else:
         if tempo_norm > 0.5:
             emotions["primary"] = "평온/안정"
@@ -77,15 +97,14 @@ def map_acoustic_to_emotion(features):
             emotions["primary"] = "차분/진지"
             emotions["characteristics"].append("안정적인 음성 특성")
         emotions["confidence"] = 0.5
-    # 음성 특성 상세 분석
     emotions["details"] = {
         "energy_level": f"{energy_norm:.1f}%",
         "speech_rate": f"{'빠름' if tempo_norm > 0.6 else '보통' if tempo_norm > 0.4 else '느림'}",
         "pitch_variation": f"{'높음' if pitch_norm > 0.6 else '보통' if pitch_norm > 0.3 else '낮음'}",
         "voice_volume": f"{'큼' if features['volume'] > 0.7 else '보통' if features['volume'] > 0.3 else '작음'}"
     }
     return emotions
 def generate_image_from_prompt(prompt):
@@ -116,7 +135,6 @@ def generate_image_from_prompt(prompt):
             print(f"Error: {response.status_code}")
             print(f"Response: {response.text}")
             return None
     except Exception as e:
         print(f"Error generating image: {str(e)}")
         return None
@@ -133,8 +151,7 @@ def generate_detailed_prompt(text, emotions, text_sentiment):
         "평온/안정": "부드러운 초록과 베이지",
         "차분/진지": "차분한 남색과 깊은 보라"
     }
-    # 감정 강도에 따른 시각적 표현
     if emotions["intensity"] > 70:
         visual_style = "역동적인 붓질과 강한 대비"
     elif emotions["intensity"] > 40:
@@ -142,7 +159,6 @@ def generate_detailed_prompt(text, emotions, text_sentiment):
     else:
         visual_style = "부드러운 그라데이션과 차분한 톤"
-    # 프롬프트 구성
     prompt = f"한국 전통 민화 스타일의 추상화, {emotion_colors.get(emotions['primary'], '자연스러운 색상')} 기반. "
     prompt += f"{visual_style}로 표현된 {emotions['primary']}의 감정. "
     prompt += f"음성의 특징({', '.join(emotions['characteristics'])})을 화면의 동적 요소로 표현. "
@@ -154,11 +170,12 @@ def create_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as app:
         state = gr.State({
             "user_name": "",
             "reflections": [],
             "voice_analysis": None,
             "final_prompt": ""
         })
-z
         # 헤더
         header = gr.Markdown("# 디지털 굿판")
         user_display = gr.Markdown("")
@@ -166,10 +183,22 @@ z
         with gr.Tabs() as tabs:
             # 입장
             with gr.Tab("입장"):
-                gr.Markdown("""# 디지털 굿판에 오신 것을 환영합니다""")
                 name_input = gr.Textbox(label="이름을 알려주세요")
                 start_btn = gr.Button("여정 시작하기")
             # 청신
             with gr.Tab("청신"):
                 with gr.Row():
@@ -192,25 +221,6 @@ z
                             label="기록된 감상들"
                         )
-        def save_reflection(text, state):
-            """감상 저장"""
-            if not text.strip():
-                return state, state["reflections"]
-            try:
-                current_time = datetime.now().strftime("%H:%M:%S")
-                sentiment = text_analyzer(text)[0]
-                new_reflection = [current_time, text, f"{sentiment['label']} ({sentiment['score']:.2f})"]
-                if "reflections" not in state:
-                    state["reflections"] = []
-                state["reflections"].append(new_reflection)
-                return state, state["reflections"]
-            except Exception as e:
-                print(f"Error in save_reflection: {str(e)}")
-                return state, []
             # 기원
             with gr.Tab("기원"):
                 gr.Markdown("## 기원 - 목소리로 전하기")
@@ -254,10 +264,52 @@ z
                         type="pil"
                     )
-        # 인터페이스 함수들
         def start_journey(name):
             """여정 시작"""
-            return f"# 환영합니다, {name}님의 디지털 굿판", gr.update(selected="청신")
         def clear_voice_input():
             """음성 입력 초기화"""
@@ -271,7 +323,6 @@ z
             try:
                 y, sr = librosa.load(audio_path, sr=16000)
-                # 음향학적 특성 분석
                 acoustic_features = {
                     "energy": float(np.mean(librosa.feature.rms(y=y))),
                     "tempo": float(librosa.beat.tempo(y)[0]),
@@ -279,17 +330,14 @@ z
                     "volume": float(np.mean(np.abs(y)))
                 }
-                # 감정 분석
-                emotions = map_acoustic_to_emotion(acoustic_features)
-                # 음성 인식
                 transcription = speech_recognizer(y)
                 text = transcription["text"]
-                # 텍스트 감정 분석
                 text_sentiment = text_analyzer(text)[0]
-                # 결과 포맷팅
                 voice_result = (
                     f"음성 감정: {emotions['primary']} "
                     f"(강도: {emotions['intensity']:.1f}%, 신뢰도: {emotions['confidence']:.2f})\n"
@@ -301,9 +349,11 @@ z
                     f"- 음성 크기: {emotions['details']['voice_volume']}"
                 )
-                text_result = f"텍스트 감정 분석 (1-5): {text_sentiment['score']}"
-                # 프롬프트 생성
                 prompt = generate_detailed_prompt(text, emotions, text_sentiment)
                 return state, text, voice_result, text_result, prompt
@@ -312,11 +362,17 @@ z
         # 이벤트 연결
         start_btn.click(
-            fn=lambda name: (f"# 환영합니다, {name}님의 디지털 굿판", gr.update(selected="청신")),
             inputs=[name_input],
             outputs=[user_display, tabs]
         )
         save_btn.click(
             fn=save_reflection,
             inputs=[reflection_input, state],
@@ -341,6 +397,8 @@ z
             outputs=[result_image]
         )
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch(debug=True)

+```python
+# 1/2 시작 - app.py
 import gradio as gr
 import numpy as np
 import librosa
     model="nlptown/bert-base-multilingual-uncased-sentiment"
 )
+def calculate_baseline_features(audio_path):
+    """기준점 음성 특성 분석"""
+    try:
+        y, sr = librosa.load(audio_path, sr=16000)
+        features = {
+            "energy": float(np.mean(librosa.feature.rms(y=y))),
+            "tempo": float(librosa.beat.tempo(y)[0]),
+            "pitch": float(np.mean(librosa.feature.zero_crossing_rate(y))),
+            "volume": float(np.mean(np.abs(y))),
+            "mfcc": librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1).tolist()
+        }
+        return features
+    except Exception as e:
+        print(f"Error calculating baseline: {str(e)}")
+        return None
+def map_acoustic_to_emotion(features, baseline_features=None):
+    """음향학적 특성을 감정으로 매핑 (기준점 대비)"""
     # 음성 특성 정규화
+    energy_norm = min(features["energy"] * 100, 100)
+    tempo_norm = min(features["tempo"] / 200, 1)
+    pitch_norm = min(features["pitch"] * 2, 1)
+    # 기준점이 있는 경우 상대적 변화 계산
+    if baseline_features:
+        energy_norm = (features["energy"] / baseline_features["energy"]) * 50
+        tempo_norm = (features["tempo"] / baseline_features["tempo"])
+        pitch_norm = (features["pitch"] / baseline_features["pitch"])
     emotions = {
         "primary": "",
         "intensity": energy_norm,
         "secondary": "",
         "characteristics": []
     }
+    # 감정 매핑 로직
     if energy_norm > 70:
         if tempo_norm > 0.6:
             emotions["primary"] = "기쁨/열정"
             emotions["primary"] = "분노/강조"
             emotions["characteristics"].append("강한 음성 강도")
         emotions["confidence"] = energy_norm / 100
     elif pitch_norm > 0.6:
         if energy_norm > 50:
             emotions["primary"] = "놀람/흥분"
             emotions["primary"] = "관심/호기심"
             emotions["characteristics"].append("음고 변화가 큼")
         emotions["confidence"] = pitch_norm
     elif energy_norm < 30:
         if tempo_norm < 0.4:
             emotions["primary"] = "슬픔/우울"
             emotions["primary"] = "피로/무기력"
             emotions["characteristics"].append("낮은 에너지 레벨")
         emotions["confidence"] = (30 - energy_norm) / 30
     else:
         if tempo_norm > 0.5:
             emotions["primary"] = "평온/안정"
             emotions["primary"] = "차분/진지"
             emotions["characteristics"].append("안정적인 음성 특성")
         emotions["confidence"] = 0.5
     emotions["details"] = {
         "energy_level": f"{energy_norm:.1f}%",
         "speech_rate": f"{'빠름' if tempo_norm > 0.6 else '보통' if tempo_norm > 0.4 else '느림'}",
         "pitch_variation": f"{'높음' if pitch_norm > 0.6 else '보통' if pitch_norm > 0.3 else '낮음'}",
         "voice_volume": f"{'큼' if features['volume'] > 0.7 else '보통' if features['volume'] > 0.3 else '작음'}"
     }
     return emotions
 def generate_image_from_prompt(prompt):
             print(f"Error: {response.status_code}")
             print(f"Response: {response.text}")
             return None
     except Exception as e:
         print(f"Error generating image: {str(e)}")
         return None
         "평온/안정": "부드러운 초록과 베이지",
         "차분/진지": "차분한 남색과 깊은 보라"
     }
     if emotions["intensity"] > 70:
         visual_style = "역동적인 붓질과 강한 대비"
     elif emotions["intensity"] > 40:
     else:
         visual_style = "부드러운 그라데이션과 차분한 톤"
     prompt = f"한국 전통 민화 스타일의 추상화, {emotion_colors.get(emotions['primary'], '자연스러운 색상')} 기반. "
     prompt += f"{visual_style}로 표현된 {emotions['primary']}의 감정. "
     prompt += f"음성의 특징({', '.join(emotions['characteristics'])})을 화면의 동적 요소로 표현. "
     with gr.Blocks(theme=gr.themes.Soft()) as app:
         state = gr.State({
             "user_name": "",
+            "baseline_features": None,  # 개인화된 기준점 저장
             "reflections": [],
             "voice_analysis": None,
             "final_prompt": ""
         })
         # 헤더
         header = gr.Markdown("# 디지털 굿판")
         user_display = gr.Markdown("")
         with gr.Tabs() as tabs:
             # 입장
             with gr.Tab("입장"):
+                gr.Markdown("### 디지털 굿판에 오신 것을 환영합니다")
                 name_input = gr.Textbox(label="이름을 알려주세요")
                 start_btn = gr.Button("여정 시작하기")
+            # 기준 설정
+            with gr.Tab("기준 설정"):
+                gr.Markdown("### 축원의 문장을 평온한 마음으로 읽어주세요")
+                gr.Markdown("'당신의 건강과 행복이 늘 가득하기를'")
+                baseline_audio = gr.Audio(
+                    label="축원 문장 녹음하기",
+                    sources=["microphone"],
+                    type="filepath"
+                )
+                set_baseline_btn = gr.Button("기준점 설정 완료")
+                baseline_status = gr.Markdown("")
             # 청신
             with gr.Tab("청신"):
                 with gr.Row():
                             label="기록된 감상들"
                         )
             # 기원
             with gr.Tab("기원"):
                 gr.Markdown("## 기원 - 목소리로 전하기")
                         type="pil"
                     )
         def start_journey(name):
             """여정 시작"""
+            welcome_text = f"""
+            # 환영합니다, {name}님의 디지털 굿판
+            ## 굿판의 세계관 🌌
+            디지털 굿판은 현대 도시 속에서 잊혀진 전통 굿의 정수를 담아낸 **디지털 의례의 공간**입니다.
+            이곳에서는 사람들의 목소리와 감정을 통해 **영적 교감**을 나누고, **자연과 도시의 에너지**를 연결하며,
+            평온함과 치유를 경험하게 됩니다.
+            ## 여정을 시작하며 🚀
+            먼저, 평온한 마음으로 축원의 문장을 읽어주세요.
+            이는 당신의 감정을 더 정확하게 이해하기 위한 기준점이 될 것입니다.
+            """
+            return welcome_text, gr.update(selected="기준 설정")
+        def set_baseline(audio_path, state):
+            """기준점 설정"""
+            if audio_path is None:
+                return state, "먼저 축원 문장을 녹음해주세요."
+            baseline_features = calculate_baseline_features(audio_path)
+            state = state.copy()
+            state["baseline_features"] = baseline_features
+            return state, "기준점이 설정되었습니다. 이제 청신 단계로 이동하실 수 있습니다.", gr.update(selected="청신")
+        def save_reflection(text, state):
+            """감상 저장"""
+            if not text.strip():
+                return state, state["reflections"]
+            try:
+                current_time = datetime.now().strftime("%H:%M:%S")
+                sentiment = text_analyzer(text)[0]
+                new_reflection = [current_time, text, f"{sentiment['label']} ({sentiment['score']:.2f})"]
+                if "reflections" not in state:
+                    state["reflections"] = []
+                state["reflections"].append(new_reflection)
+                return state, state["reflections"]
+            except Exception as e:
+                print(f"Error in save_reflection: {str(e)}")
+                return state, []
         def clear_voice_input():
             """음성 입력 초기화"""
             try:
                 y, sr = librosa.load(audio_path, sr=16000)
                 acoustic_features = {
                     "energy": float(np.mean(librosa.feature.rms(y=y))),
                     "tempo": float(librosa.beat.tempo(y)[0]),
                     "volume": float(np.mean(np.abs(y)))
                 }
+                # 기준점이 있는 경우 상대적 분석
+                baseline = state.get("baseline_features")
+                emotions = map_acoustic_to_emotion(acoustic_features, baseline)
                 transcription = speech_recognizer(y)
                 text = transcription["text"]
                 text_sentiment = text_analyzer(text)[0]
                 voice_result = (
                     f"음성 감정: {emotions['primary']} "
                     f"(강도: {emotions['intensity']:.1f}%, 신뢰도: {emotions['confidence']:.2f})\n"
                     f"- 음성 크기: {emotions['details']['voice_volume']}"
                 )
+                if baseline:
+                    voice_result += "\n\n[기준점 대비 분석]\n"
+                    voice_result += f"기준 상태와 비교한 감정 강도 변화: {emotions['intensity']-50:.1f}%"
+                text_result = f"텍스트 감정 분석 (1-5): {text_sentiment['score']}"
                 prompt = generate_detailed_prompt(text, emotions, text_sentiment)
                 return state, text, voice_result, text_result, prompt
         # 이벤트 연결
         start_btn.click(
+            fn=start_journey,
             inputs=[name_input],
             outputs=[user_display, tabs]
         )
+        set_baseline_btn.click(
+            fn=set_baseline,
+            inputs=[baseline_audio, state],
+            outputs=[state, baseline_status, tabs]
+        )
         save_btn.click(
             fn=save_reflection,
             inputs=[reflection_input, state],
             outputs=[result_image]
         )
+        return app
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch(debug=True)