Spaces:

haepada
/

roots

Sleeping

App Files Files Community

haepada commited on Nov 5, 2024

Commit

5db3ce1

verified ·

1 Parent(s): e8d592d

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -37

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import numpy as np
 import librosa
 from transformers import pipeline
 from datetime import datetime
 import os
 import requests
@@ -23,6 +24,10 @@ text_analyzer = pipeline(
     "sentiment-analysis",
     model="nlptown/bert-base-multilingual-uncased-sentiment"
 )
 def generate_image_from_prompt(prompt):
     """HuggingFace Inference API를 통한 이미지 생성"""
@@ -134,43 +139,106 @@ def create_interface():
             return None
         def analyze_voice(audio_path, state):
-            """음성 분석"""
-            if audio_path is None:
-                return state, "음성을 먼저 녹음해주세요.", "", "", ""
-            try:
-                y, sr = librosa.load(audio_path, sr=16000)
-                transcription = speech_recognizer(y)
-                text = transcription["text"]
-                voice_emotions = emotion_classifier(y)
-                text_sentiment = text_analyzer(text)[0]
-                prompt = generate_prompt(text, voice_emotions[0], text_sentiment)
-                return (
-                    state,
-                    text,
-                    f"음성 감정: {voice_emotions[0]['label']} ({voice_emotions[0]['score']:.2f})",
-                    f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})",
-                    prompt
-                )
-            except Exception as e:
-                return state, f"오류 발생: {str(e)}", "", "", ""
-        def generate_prompt(text, voice_emotion, text_sentiment):
-            """프롬프트 생성"""
-            emotion_colors = {
-                "happy": "따뜻한 노란색과 주황색",
-                "sad": "깊은 파랑색과 보라색",
-                "angry": "강렬한 빨강색과 검정색",
-                "neutral": "부드러운 회색과 베이지색"
-            }
-            color = emotion_colors.get(voice_emotion['label'], "자연스러운 색상")
-            prompt = f"한국 전통 민화 스타일의 추상화, {color} 사용. "
-            prompt += f"음성의 감정({voice_emotion['label']})과 텍스트의 감정({text_sentiment['label']})이 조화를 이루며, "
-            prompt += f"음성의 특징을 반영한 동적인 구도. 발화 내용: '{text}'"
-            return prompt
         def save_reflection(text, state):
             """감상 저장"""

 import numpy as np
 import librosa
 from transformers import pipeline
+import torch
 from datetime import datetime
 import os
 import requests
     "sentiment-analysis",
     model="nlptown/bert-base-multilingual-uncased-sentiment"
 )
+korean_sentiment = pipeline(
+    "text-classification",
+    model="searle-j/korean_sentiment_analysis"  # 한국어 감정 분석 모델
+)
 def generate_image_from_prompt(prompt):
     """HuggingFace Inference API를 통한 이미지 생성"""
             return None
         def analyze_voice(audio_path, state):
+    """음성 분석 개선"""
+    if audio_path is None:
+        return state, "음성을 먼저 녹음해주세요.", "", "", ""
+    try:
+        # 오디오 로드
+        y, sr = librosa.load(audio_path, sr=16000)
+        # 1. 음향학적 특성 분석
+        acoustic_features = {
+            "energy": float(np.mean(librosa.feature.rms(y=y))),
+            "tempo": float(librosa.beat.tempo(y)[0]),
+            "pitch": float(np.mean(librosa.feature.zero_crossing_rate(y))),
+            "volume": float(np.mean(np.abs(y)))
+        }
+        # 음성의 특성에 따른 감정 매핑
+        voice_emotion = map_acoustic_to_emotion(acoustic_features)
+        # 2. 음성-텍스트 변환
+        transcription = speech_recognizer(y)
+        text = transcription["text"]
+        # 3. 텍스트 감정 분석
+        text_sentiment = korean_sentiment(text)[0]
+        # 결과 포맷팅
+        voice_result = f"음성 감정: {voice_emotion['emotion']} (강도: {voice_emotion['intensity']:.2f})"
+        text_result = f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})"
+        # 프롬프트 생성
+        prompt = generate_detailed_prompt(text, voice_emotion, text_sentiment, acoustic_features)
+        return (
+            state,
+            text,
+            voice_result,
+            text_result,
+            prompt
+        )
+    except Exception as e:
+        return state, f"오류 발생: {str(e)}", "", "", ""
+def map_acoustic_to_emotion(features):
+    """음향학적 특성을 감정으로 매핑"""
+    # 에너지 기반 감정 강도
+    intensity = features["energy"] * 100
+    # 음성 특성에 따른 감정 분류
+    if features["energy"] > 0.7:
+        if features["tempo"] > 120:
+            emotion = "기쁨/흥분"
+        else:
+            emotion = "분노/강조"
+    elif features["pitch"] > 0.6:
+        emotion = "놀람/관심"
+    elif features["energy"] < 0.3:
+        emotion = "슬픔/우울"
+    else:
+        emotion = "평온/중립"
+    return {
+        "emotion": emotion,
+        "intensity": intensity,
+        "features": features
+    }
+def generate_detailed_prompt(text, voice_emotion, text_sentiment, acoustic_features):
+    """더 상세한 프롬프트 생성"""
+    # 감정별 색상 매핑
+    emotion_colors = {
+        "기쁨/흥분": "밝은 노랑과 주황색",
+        "분노/강조": "강렬한 빨강과 검정",
+        "놀람/관심": "선명한 파랑과 보라",
+        "슬픔/우울": "어두운 파랑과 회색",
+        "평온/중립": "부드러운 초록과 베이지"
+    }
+    # 음성 특성에 따른 시각적 요소
+    visual_elements = {
+        "high_energy": "역동적인 붓질과 강한 대비",
+        "medium_energy": "균형잡힌 구도와 자연스러운 흐름",
+        "low_energy": "부드러운 그라데이션과 차분한 톤"
+    }
+    # 에너지 레벨 결정
+    energy_level = "medium_energy"
+    if acoustic_features["energy"] > 0.7:
+        energy_level = "high_energy"
+    elif acoustic_features["energy"] < 0.3:
+        energy_level = "low_energy"
+    # 프롬프트 구성
+    prompt = f"한국 전통 민화 스타일의 추상화, {emotion_colors.get(voice_emotion['emotion'], '자연스러운 색상')} 기반. "
+    prompt += f"{visual_elements[energy_level]}를 통해 감정의 깊이를 표현. "
+    prompt += f"음성의 {voice_emotion['emotion']} 감정과 텍스트의 {text_sentiment['label']} 감정이 조화를 이루며, "
+    prompt += f"목소리의 특징(강도:{voice_emotion['intensity']:.1f})을 화면의 동적인 요소로 표현. "
+    prompt += f"발화 내용 '{text}'의 의미를 은유적 이미지로 담아내기."
+    return prompt
         def save_reflection(text, state):
             """감상 저장"""