Spaces:

haepada
/

roots

Sleeping

App Files Files Community

haepada commited on Nov 4, 2024

Commit

9d47e7f

verified ·

1 Parent(s): db42dba

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -176

app.py CHANGED Viewed

@@ -5,219 +5,162 @@ from transformers import pipeline
 from datetime import datetime
 import os
 # AI 모델 초기화
-speech_recognizer = pipeline("automatic-speech-recognition",
-                           model="kresnik/wav2vec2-large-xlsr-korean")
-emotion_classifier = pipeline("audio-classification",
-                            model="MIT/ast-finetuned-speech-commands-v2")
-text_analyzer = pipeline("sentiment-analysis",
-                        model="nlptown/bert-base-multilingual-uncased-sentiment")
 def create_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as app:
-        # 상태 관리
         state = gr.State({
-            "stage": "intro",
-            "user_name": "",
             "reflections": [],
-            "voice_analyses": [],
-            "current_prompt": "",
-            "generated_images": []
         })
-        with gr.Column():
-            # 사용자 이름 표시
-            user_name_display = gr.Markdown("", elem_id="user-name-display")
-            # 단계별 탭
-            with gr.Tabs() as tabs:
-                # 1. 입장
-                with gr.Tab("입장", id="intro"):
-                    gr.Markdown("""
-                    # 디지털 굿판에 오신 것을 환영합니다
-                    온천천의 디지털 치유 공간으로 들어가보세요.
-                    """)
-                    name_input = gr.Textbox(label="이름을 알려주세요")
-                    start_button = gr.Button("여정 시작하기")
-                # 2. 청신
-                with gr.Tab("청신", id="cleansing"):
-                    gr.Markdown("## 청신 - 소리로 정화하기")
-                    with gr.Row():
-                        # 음악 플레이어
-                        audio_player = gr.Audio(
-                            value=os.path.join(os.path.dirname(__file__), "assets", "main_music.mp3"),
-                            type="filepath",
-                            label="온천천의 소리",
-                            interactive=True,
-                            autoplay=True
                         )
-                        # 감상 입력
-                        with gr.Column():
-                            reflection_input = gr.Textbox(
-                                label="현재 순간의 감상을 적어주세요",
-                                lines=3
-                            )
-                            save_reflection = gr.Button("감상 저장")
-                            reflections_display = gr.Dataframe(
-                                headers=["시간", "감상", "감정"],
-                                label="기록된 감상들"
-                            )
-                # 3. 기원
-                with gr.Tab("기원", id="prayer"):
-                    gr.Markdown("## 기원 - 목소리로 전하기")
-                    with gr.Row():
-                        # 음성 입력
-                        voice_input = gr.Audio(
-                            label="나누고 싶은 이야기를 들려주세요",
-                            sources=["microphone"],
-                            type="filepath",
-                            interactive=True
                         )
-                        # 분석 결과 표시
-                        with gr.Column():
-                            analysis_output = gr.JSON(
-                                label="음성 분석 결과"
-                            )
-                            text_output = gr.Textbox(
-                                label="인식된 텍스트",
-                                interactive=False
-                            )
-                            emotion_output = gr.Textbox(
-                                label="감정 분석",
-                                interactive=False
-                            )
-                # 4. 송신
-                with gr.Tab("송신", id="sharing"):
-                    gr.Markdown("## 송신 - 함께 나누기")
-                    with gr.Row():
-                        # 프롬프트 및 이미지 표시
-                        final_prompt = gr.Textbox(
-                            label="생성된 프롬프트",
-                            interactive=False
-                        )
-                        generated_gallery = gr.Gallery(
-                            label="시각화 결과",
-                            columns=2,
-                            height="auto"
-                        )
         # 함수 정의
-        def start_journey(name, state):
-            """여정 시작 함수"""
-            state["user_name"] = name
-            return (
-                state,
-                f"# 환영합니다, {name}님",
-                gr.update(selected="cleansing")
-            )
-        def analyze_voice(audio_path, state):
-            """종합적인 음성 분석 함수"""
             try:
-                if audio_path is None:
-                    return state, {"error": "음성 입력이 없습니다."}, "", ""
-                # 오디오 로드
-                y, sr = librosa.load(audio_path)
-                # 1. 음향학적 특성 분석
-                acoustic_features = {
                     "energy": float(np.mean(librosa.feature.rms(y=y))),
-                    "pitch_mean": float(np.mean(librosa.pitch_tuning(y))),
                     "tempo": float(librosa.beat.tempo(y)[0]),
-                    "mfcc": librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1).tolist(),
                     "zero_crossing_rate": float(np.mean(librosa.feature.zero_crossing_rate(y)))
                 }
-                # 2. 음성 감정 분석
-                emotion_result = emotion_classifier(y)
-                # 3. 음성-텍스트 변환
-                text_result = speech_recognizer(y)
-                # 4. 텍스트 감정 분석
-                text_sentiment = text_analyzer(text_result["text"])[0]
-                # 결과 종합
-                analysis_result = {
-                    "acoustic_analysis": acoustic_features,
-                    "voice_emotion": emotion_result[0],
-                    "text": text_result["text"],
-                    "text_sentiment": text_sentiment
-                }
-                # 프롬프트 생성
-                prompt = generate_art_prompt(analysis_result)
-                state["current_prompt"] = prompt
                 return (
-                    state,
-                    analysis_result,
-                    text_result["text"],
-                    f"음성 감정: {emotion_result[0]['label']} ({emotion_result[0]['score']:.2f})\n"
-                    f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})"
                 )
             except Exception as e:
-                return state, {"error": str(e)}, "", ""
-        def save_reflection(text, state):
-            """감상 저장 함수"""
-            if not text.strip():
-                return state, state["reflections"]
-            current_time = datetime.now().strftime("%H:%M:%S")
-            sentiment = text_analyzer(text)[0]
-            new_reflection = [current_time, text, sentiment["label"]]
-            state["reflections"].append(new_reflection)
-            return state, state["reflections"]
-        def generate_art_prompt(analysis):
-            """분석 결과를 바탕으로 예술적 프롬프트 생성"""
-            emotion = analysis["voice_emotion"]["label"]
-            energy = analysis["acoustic_analysis"]["energy"]
-            text_sentiment = analysis["text_sentiment"]["label"]
-            colors = {
-                "happy": "따뜻한 노란색과 주황색",
-                "sad": "차분한 파랑색과 보라색",
-                "angry": "강렬한 빨강색과 검정색",
-                "neutral": "부드러운 회색과 베이지색"
-            }
-            prompt = f"한국 전통 민화 스타일로, {emotion}의 감정을 {colors.get(emotion, '자연스러운 색상')}으로 표현한 추상화. "
-            prompt += f"음성의 에너지({energy:.2f})를 채도로 표현하고, "
-            prompt += f"텍스트의 감정({text_sentiment})을 구도에 반영."
-            return prompt
         # 이벤트 연결
-        start_button.click(
             fn=start_journey,
-            inputs=[name_input, state],
-            outputs=[state, user_name_display, tabs]
         )
-        save_reflection.click(
             fn=save_reflection,
             inputs=[reflection_input, state],
             outputs=[state, reflections_display]
         )
         voice_input.change(
             fn=analyze_voice,
             inputs=[voice_input, state],
-            outputs=[state, analysis_output, text_output, emotion_output]
         )
     return app
 if __name__ == "__main__":
-    interface = create_interface()
-    interface.launch()

 from datetime import datetime
 import os
+# 전역 변수 설정
+SAMPLE_RATE = 16000  # 샘플링 레이트 고정
+N_MELS = 64  # mel 필터 수 조정
 # AI 모델 초기화
+text_analyzer = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
 def create_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as app:
+        # 상태 변수
         state = gr.State({
             "reflections": [],
+            "user_name": "",
+            "analyses": []
         })
+        # 헤더
+        header = gr.Markdown("# 디지털 굿판")
+        user_display = gr.Markdown("")
+        with gr.Tabs() as tabs:
+            # 입장 탭
+            with gr.Tab("입장") as intro_tab:
+                gr.Markdown("""
+                # 디지털 굿판에 오신 것을 환영합니다
+                온천천의 디지털 치유 공간으로 들어가보세요.
+                """)
+                name_input = gr.Textbox(label="이름을 알려주세요")
+                start_btn = gr.Button("여정 시작하기")
+            # 청신 탭
+            with gr.Tab("청신") as cleansing_tab:
+                with gr.Row():
+                    # 음악 플레이어
+                    audio = gr.Audio(
+                        value="assets/main_music.mp3",
+                        type="filepath",
+                        label="온천천의 소리",
+                        interactive=True
+                    )
+                    with gr.Column():
+                        reflection_input = gr.Textbox(
+                            label="현재 순간의 감상을 적어주세요",
+                            lines=3
                         )
+                        save_btn = gr.Button("감상 저장하기")
+                        reflections_display = gr.Dataframe(
+                            headers=["시간", "감상", "감정"],
+                            label="기록된 감상들"
                         )
+            # 기원 탭
+            with gr.Tab("기원") as prayer_tab:
+                with gr.Row():
+                    voice_input = gr.Audio(
+                        label="나누고 싶은 이야기를 들려주세요",
+                        sources=["microphone"],
+                        type="filepath"
+                    )
+                    with gr.Column():
+                        text_output = gr.Textbox(label="인식된 텍스트")
+                        emotion_output = gr.Textbox(label="감정 분석")
+                        audio_features = gr.JSON(label="음성 특성 분석")
+            # 송신 탭
+            with gr.Tab("송신") as sharing_tab:
+                prompt_display = gr.Textbox(label="생성된 프롬프트")
+                gallery = gr.Gallery(label="시각화 결과")
         # 함수 정의
+        def start_journey(name):
+            """여정 시작"""
+            if name.strip():
+                welcome_text = f"# 환영합니다, {name}님"
+                return welcome_text, gr.update(selected="청신")
+            return "이름을 입력해주세요", gr.update(selected="입장")
+        def save_reflection(text, state_data):
+            """감상 저장"""
+            if not text.strip():
+                return state_data, []
             try:
+                current_time = datetime.now().strftime("%H:%M:%S")
+                sentiment = text_analyzer(text)[0]
+                new_reflection = [current_time, text, sentiment["label"]]
+                if "reflections" not in state_data:
+                    state_data["reflections"] = []
+                state_data["reflections"].append(new_reflection)
+                return state_data, state_data["reflections"]
+            except Exception as e:
+                print(f"Error in save_reflection: {str(e)}")
+                return state_data, []
+        def analyze_voice(audio_path, state_data):
+            """음성 분석"""
+            if audio_path is None:
+                return None, None, None, state_data
+            try:
+                # 오디오 로드 및 리샘플링
+                y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
+                # 기본 특성 추출
+                features = {
                     "energy": float(np.mean(librosa.feature.rms(y=y))),
                     "tempo": float(librosa.beat.tempo(y)[0]),
                     "zero_crossing_rate": float(np.mean(librosa.feature.zero_crossing_rate(y)))
                 }
+                # MFCC 계산 (파라미터 조정)
+                mfccs = librosa.feature.mfcc(
+                    y=y,
+                    sr=sr,
+                    n_mfcc=13,
+                    n_mels=N_MELS
+                )
+                features["mfcc_mean"] = np.mean(mfccs, axis=1).tolist()
                 return (
+                    "음성이 성공적으로 분석되었습니다.",  # 텍스트 출력
+                    f"에너지: {features['energy']:.2f}\n템포: {features['tempo']:.2f}",  # 감정 출력
+                    features,  # JSON 출력
+                    state_data  # 상태 업데이트
                 )
             except Exception as e:
+                print(f"Error in analyze_voice: {str(e)}")
+                return f"오류 발생: {str(e)}", None, None, state_data
         # 이벤트 연결
+        start_btn.click(
             fn=start_journey,
+            inputs=[name_input],
+            outputs=[user_display, tabs]
         )
+        save_btn.click(
             fn=save_reflection,
             inputs=[reflection_input, state],
             outputs=[state, reflections_display]
         )
         voice_input.change(
             fn=analyze_voice,
             inputs=[voice_input, state],
+            outputs=[text_output, emotion_output, audio_features, state]
         )
     return app
+# 앱 실행
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()