Spaces:

haepada
/

roots

Sleeping

App Files Files Community

haepada commited on Nov 4, 2024

Commit

e006b08

verified ·

1 Parent(s): 8e216d6

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -112

app.py CHANGED Viewed

@@ -4,11 +4,19 @@ import librosa
 from transformers import pipeline
 from datetime import datetime
 import os
 # AI 모델 초기화
 speech_recognizer = pipeline(
     "automatic-speech-recognition",
-    model="kresnik/wav2vec2-large-xlsr-korean"  # 한국어 음성인식 모델
 )
 emotion_classifier = pipeline(
     "audio-classification",
@@ -21,13 +29,12 @@ text_analyzer = pipeline(
 def create_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as app:
-        # 상태 관리
         state = gr.State({
             "user_name": "",
             "reflections": [],
             "voice_analysis": None,
             "final_prompt": "",
-            "generated_images": []  # 생성된 이미지 저장
         })
         # 헤더
@@ -44,10 +51,14 @@ def create_interface():
             # 청신
             with gr.Tab("청신"):
                 with gr.Row():
                     audio = gr.Audio(
-                        value="assets/main_music.mp3",
                         type="filepath",
-                        label="온천천의 소리"
                     )
                     with gr.Column():
                         reflection_input = gr.Textbox(
@@ -64,14 +75,16 @@ def create_interface():
             with gr.Tab("기원"):
                 gr.Markdown("## 기원 - 목소리로 전하기")
                 with gr.Row():
-                    # 음성 입력
-                    voice_input = gr.Audio(
-                        label="나누고 싶은 이야기를 들려주세요",
-                        sources=["microphone"],
-                        type="filepath"
-                    )
-                    # 분석 결과
                     with gr.Column():
                         transcribed_text = gr.Textbox(
                             label="인식된 텍스트",
@@ -85,9 +98,7 @@ def create_interface():
                             label="텍스트 감정 분석",
                             interactive=False
                         )
-                        analysis_details = gr.JSON(
-                            label="상세 분석 결과"
-                        )
             # 송신
             with gr.Tab("송신"):
@@ -97,135 +108,91 @@ def create_interface():
                         label="생성된 프롬프트",
                         interactive=False
                     )
                     gallery = gr.Gallery(
                         label="시각화 결과",
-                        columns=2
                     )
-                    share_btn = gr.Button("결과 공유하기")
-        def analyze_voice_comprehensive(audio_path, state):
-            """종합적인 음성 분석"""
-            try:
-                if audio_path is None:
-                    return state, "음성 입력이 필요합니다.", "", "", {}
                 # 오디오 로드
                 y, sr = librosa.load(audio_path, sr=16000)
-                # 1. 음성-텍스트 변환
                 transcription = speech_recognizer(y)
-                spoken_text = transcription["text"]
-                # 2. 음향학적 특성 분석
-                features = {
-                    "energy": float(np.mean(librosa.feature.rms(y=y))),
-                    "pitch": float(np.mean(librosa.piptrack(y=y, sr=sr)[1])),
-                    "tempo": float(librosa.beat.tempo(y)[0]),
-                    "zero_crossing_rate": float(np.mean(librosa.feature.zero_crossing_rate(y)))
-                }
-                # 3. 음성 감정 분석
                 voice_emotions = emotion_classifier(y)
-                primary_emotion = voice_emotions[0]
-                # 4. 텍스트 감정 분석
-                text_sentiment = text_analyzer(spoken_text)[0]
-                # 결과 종합
-                analysis_result = {
-                    "acoustic_features": features,
-                    "voice_emotion": primary_emotion,
-                    "text_sentiment": text_sentiment
-                }
-                # 프롬프트 생성
-                prompt = generate_art_prompt(spoken_text, analysis_result, state["reflections"])
-                state["final_prompt"] = prompt
                 return (
                     state,
-                    spoken_text,
-                    f"음성 감정: {primary_emotion['label']} ({primary_emotion['score']:.2f})",
                     f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})",
-                    analysis_result
                 )
             except Exception as e:
-                return state, f"오류 발생: {str(e)}", "", "", {}
-        def generate_art_prompt(text, analysis, reflections):
-            """예술적 프롬프트 생성"""
-            # 음성 감정
-            voice_emotion = analysis["voice_emotion"]["label"]
-            # 텍스트 감정
-            text_sentiment = analysis["text_sentiment"]["label"]
-            # 에너지 레벨
-            energy = analysis["acoustic_features"]["energy"]
-            # 감정에 따른 색상 매핑
-            emotion_colors = {
-                "happy": "따뜻한 노란색과 주황색",
-                "sad": "깊은 파랑색과 보라색",
-                "angry": "강렬한 빨강색과 검정색",
-                "neutral": "부드러운 회색과 베이지색"
-            }
-            # 기본 프롬프트 구성
-            prompt = f"한국 전통 민화 스타일의 추상화, {emotion_colors.get(voice_emotion, '자연스러운 색상')} 사용. "
-            prompt += f"음성의 감정({voice_emotion})과 텍스트의 감정({text_sentiment})이 조화를 이루며, "
-            prompt += f"에너지 레벨({energy:.2f})을 통해 화면의 동적인 느낌을 표현. "
-            # 이전 감상들 반영
-            if reflections:
-                prompt += "이전 감상들의 정서를 배경에 은은하게 담아내기. "
-            return prompt
-        def save_reflection(text, state):
-            """감상 저장 및 감정 분석"""
-            if not text.strip():
-                return state, state["reflections"]
-            current_time = datetime.now().strftime("%H:%M:%S")
-            sentiment = text_analyzer(text)[0]
-            new_reflection = [current_time, text, f"{sentiment['label']} ({sentiment['score']:.2f})"]
-            state["reflections"].append(new_reflection)
-            return state, state["reflections"]
-        def start_journey(name):
-            """여정 시작"""
-            welcome_text = f"# 환영합니다, {name}님의 디지털 굿판"
-            return welcome_text, gr.update(selected="청신")
         # 이벤트 연결
         start_btn.click(
-            fn=start_journey,
             inputs=[name_input],
             outputs=[user_display, tabs]
         )
         save_btn.click(
-            fn=save_reflection,
             inputs=[reflection_input, state],
             outputs=[state, reflections_display]
         )
-        voice_input.change(
-            fn=analyze_voice_comprehensive,
             inputs=[voice_input, state],
-            outputs=[
-                state,
-                transcribed_text,
-                voice_emotion,
-                text_emotion,
-                analysis_details
-            ]
         )
     return app
-# 앱 실행
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch()

 from transformers import pipeline
 from datetime import datetime
 import os
+from diffusers import StableDiffusionPipeline
+import torch
+# 스테이블 디퓨전 초기화
+model_id = "runwayml/stable-diffusion-v1-5"
+pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+if torch.cuda.is_available():
+    pipe = pipe.to("cuda")
 # AI 모델 초기화
 speech_recognizer = pipeline(
     "automatic-speech-recognition",
+    model="kresnik/wav2vec2-large-xlsr-korean"
 )
 emotion_classifier = pipeline(
     "audio-classification",
 def create_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as app:
         state = gr.State({
             "user_name": "",
             "reflections": [],
             "voice_analysis": None,
             "final_prompt": "",
+            "generated_images": []
         })
         # 헤더
             # 청신
             with gr.Tab("청신"):
                 with gr.Row():
+                    # 절대 경로로 변경
+                    audio_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "assets", "main_music.mp3"))
                     audio = gr.Audio(
+                        value=audio_path,
                         type="filepath",
+                        label="온천천의 소리",
+                        interactive=False,
+                        autoplay=True
                     )
                     with gr.Column():
                         reflection_input = gr.Textbox(
             with gr.Tab("기원"):
                 gr.Markdown("## 기원 - 목소리로 전하기")
                 with gr.Row():
+                    with gr.Column():
+                        record_btn = gr.Button("🎤 녹음 시작/중지")
+                        voice_input = gr.Audio(
+                            label="나누고 싶은 이야기를 들려주세요",
+                            sources=["microphone"],
+                            type="filepath",
+                            interactive=True
+                        )
+                        clear_btn = gr.Button("녹음 지우기")
                     with gr.Column():
                         transcribed_text = gr.Textbox(
                             label="인식된 텍스트",
                             label="텍스트 감정 분석",
                             interactive=False
                         )
+                        analyze_btn = gr.Button("분석하기")
             # 송신
             with gr.Tab("송신"):
                         label="생성된 프롬프트",
                         interactive=False
                     )
+                    generate_btn = gr.Button("이미지 생성하기")
                     gallery = gr.Gallery(
                         label="시각화 결과",
+                        columns=2,
+                        show_label=True,
+                        elem_id="gallery"
                     )
+        def clear_voice_input():
+            """음성 입력 초기화"""
+            return None
+        def analyze_voice(audio_path, state):
+            """음성 분석"""
+            if audio_path is None:
+                return state, "음성을 먼저 녹음해주세요.", "", "", ""
+            try:
                 # 오디오 로드
                 y, sr = librosa.load(audio_path, sr=16000)
+                # 음성 인식
                 transcription = speech_recognizer(y)
+                text = transcription["text"]
+                # 감정 분석
                 voice_emotions = emotion_classifier(y)
+                text_sentiment = text_analyzer(text)[0]
                 return (
                     state,
+                    text,
+                    f"음성 감정: {voice_emotions[0]['label']} ({voice_emotions[0]['score']:.2f})",
                     f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})",
+                    "분석이 완료되었습니다."
                 )
             except Exception as e:
+                return state, f"오류 발생: {str(e)}", "", "", ""
+        def generate_image(prompt, state):
+            """이미지 생성"""
+            try:
+                images = pipe(prompt).images
+                image_paths = []
+                for i, image in enumerate(images):
+                    path = f"output_{i}.png"
+                    image.save(path)
+                    image_paths.append(path)
+                return image_paths
+            except Exception as e:
+                return []
         # 이벤트 연결
         start_btn.click(
+            fn=lambda name: (f"# 환영합니다, {name}님의 디지털 굿판", gr.update(selected="청신")),
             inputs=[name_input],
             outputs=[user_display, tabs]
         )
         save_btn.click(
+            fn=lambda text, state: save_reflection(text, state),
             inputs=[reflection_input, state],
             outputs=[state, reflections_display]
         )
+        clear_btn.click(
+            fn=clear_voice_input,
+            inputs=[],
+            outputs=[voice_input]
+        )
+        analyze_btn.click(
+            fn=analyze_voice,
             inputs=[voice_input, state],
+            outputs=[state, transcribed_text, voice_emotion, text_emotion, final_prompt]
+        )
+        generate_btn.click(
+            fn=generate_image,
+            inputs=[final_prompt, state],
+            outputs=[gallery]
         )
     return app
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch()