Spaces:

haepada
/

roots

Running

App Files Files Community

haepada commited on Nov 4, 2024

Commit

8b6ea6c

verified ·

1 Parent(s): e006b08

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -46

app.py CHANGED Viewed

@@ -4,14 +4,11 @@ import librosa
 from transformers import pipeline
 from datetime import datetime
 import os
-from diffusers import StableDiffusionPipeline
-import torch
-# 스테이블 디퓨전 초기화
-model_id = "runwayml/stable-diffusion-v1-5"
-pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
-if torch.cuda.is_available():
-    pipe = pipe.to("cuda")
 # AI 모델 초기화
 speech_recognizer = pipeline(
@@ -33,9 +30,29 @@ def create_interface():
             "user_name": "",
             "reflections": [],
             "voice_analysis": None,
-            "final_prompt": "",
-            "generated_images": []
         })
         # 헤더
         header = gr.Markdown("# 디지털 굿판")
@@ -52,7 +69,7 @@ def create_interface():
             with gr.Tab("청신"):
                 with gr.Row():
                     # 절대 경로로 변경
-                    audio_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "assets", "main_music.mp3"))
                     audio = gr.Audio(
                         value=audio_path,
                         type="filepath",
@@ -76,7 +93,6 @@ def create_interface():
                 gr.Markdown("## 기원 - 목소리로 전하기")
                 with gr.Row():
                     with gr.Column():
-                        record_btn = gr.Button("🎤 녹음 시작/중지")
                         voice_input = gr.Audio(
                             label="나누고 싶은 이야기를 들려주세요",
                             sources=["microphone"],
@@ -100,21 +116,26 @@ def create_interface():
                         )
                         analyze_btn = gr.Button("분석하기")
-            # 송신
-            with gr.Tab("송신"):
-                gr.Markdown("## 송신 - 시각화 결과")
-                with gr.Column():
-                    final_prompt = gr.Textbox(
-                        label="생성된 프롬프트",
-                        interactive=False
-                    )
-                    generate_btn = gr.Button("이미지 생성하기")
-                    gallery = gr.Gallery(
-                        label="시각화 결과",
-                        columns=2,
-                        show_label=True,
-                        elem_id="gallery"
-                    )
         def clear_voice_input():
             """음성 입력 초기화"""
@@ -136,29 +157,50 @@ def create_interface():
                 # 감정 분석
                 voice_emotions = emotion_classifier(y)
                 text_sentiment = text_analyzer(text)[0]
                 return (
                     state,
                     text,
                     f"음성 감정: {voice_emotions[0]['label']} ({voice_emotions[0]['score']:.2f})",
                     f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})",
-                    "분석이 완료되었습니다."
                 )
             except Exception as e:
                 return state, f"오류 발생: {str(e)}", "", "", ""
-        def generate_image(prompt, state):
-            """이미지 생성"""
-            try:
-                images = pipe(prompt).images
-                image_paths = []
-                for i, image in enumerate(images):
-                    path = f"output_{i}.png"
-                    image.save(path)
-                    image_paths.append(path)
-                return image_paths
-            except Exception as e:
-                return []
         # 이벤트 연결
         start_btn.click(
@@ -168,7 +210,7 @@ def create_interface():
         )
         save_btn.click(
-            fn=lambda text, state: save_reflection(text, state),
             inputs=[reflection_input, state],
             outputs=[state, reflections_display]
         )
@@ -185,12 +227,6 @@ def create_interface():
             outputs=[state, transcribed_text, voice_emotion, text_emotion, final_prompt]
         )
-        generate_btn.click(
-            fn=generate_image,
-            inputs=[final_prompt, state],
-            outputs=[gallery]
-        )
     return app
 if __name__ == "__main__":

 from transformers import pipeline
 from datetime import datetime
 import os
+import requests
+# Inference API 설정
+API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0"
+headers = {"Authorization": "Bearer hf_..."}  # 여기에 HuggingFace 토큰 입력
 # AI 모델 초기화
 speech_recognizer = pipeline(
             "user_name": "",
             "reflections": [],
             "voice_analysis": None,
+            "final_prompt": ""
         })
+def generate_image_from_prompt(prompt):
+    """HuggingFace Inference API를 통한 이미지 생성"""
+    try:
+        response = requests.post(API_URL, headers=headers, json={
+            "inputs": prompt,
+            "parameters": {
+                "negative_prompt": "ugly, blurry, poor quality, distorted",
+                "num_inference_steps": 30,
+                "guidance_scale": 7.5
+            }
+        })
+        if response.status_code == 200:
+            return response.content  # 바이너리 이미지 데이터 반환
+        else:
+            return None
+    except Exception as e:
+        print(f"Error generating image: {e}")
+        return None
         # 헤더
         header = gr.Markdown("# 디지털 굿판")
             with gr.Tab("청신"):
                 with gr.Row():
                     # 절대 경로로 변경
+                    audio_path = os.path.abspath(os.path.join("assets", "main_music.mp3"))
                     audio = gr.Audio(
                         value=audio_path,
                         type="filepath",
                 gr.Markdown("## 기원 - 목소리로 전하기")
                 with gr.Row():
                     with gr.Column():
                         voice_input = gr.Audio(
                             label="나누고 싶은 이야기를 들려주세요",
                             sources=["microphone"],
                         )
                         analyze_btn = gr.Button("분석하기")
+   # 이벤트 핸들러 추가
+generate_btn.click(
+    fn=generate_image_from_prompt,
+    inputs=[final_prompt],
+    outputs=[result_image]
+)
+# 송신 탭 부분 수정
+with gr.Tab("송신"):
+    gr.Markdown("## 송신 - 시각화 결과")
+    with gr.Column():
+        final_prompt = gr.Textbox(
+            label="생성된 프롬프트",
+            interactive=False,
+            lines=3
+        )
+        generate_btn = gr.Button("이미지 생성하기")
+        result_image = gr.Image(
+            label="생성된 이미지",
+            type="pil"
+        )
         def clear_voice_input():
             """음성 입력 초기화"""
                 # 감정 분석
                 voice_emotions = emotion_classifier(y)
                 text_sentiment = text_analyzer(text)[0]
+                # 프롬프트 생성
+                prompt = generate_prompt(text, voice_emotions[0], text_sentiment)
                 return (
                     state,
                     text,
                     f"음성 감정: {voice_emotions[0]['label']} ({voice_emotions[0]['score']:.2f})",
                     f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})",
+                    prompt
                 )
             except Exception as e:
                 return state, f"오류 발생: {str(e)}", "", "", ""
+        def generate_prompt(text, voice_emotion, text_sentiment):
+            """프롬프트 생성"""
+            emotion_colors = {
+                "happy": "따뜻한 노란색과 주황색",
+                "sad": "깊은 파랑색과 보라색",
+                "angry": "강렬한 빨강색과 검정색",
+                "neutral": "부드러운 회색과 베이지색"
+            }
+            color = emotion_colors.get(voice_emotion['label'], "자연스러운 색상")
+            prompt = f"한국 전통 민화 스타일의 추상화, {color} 사용. "
+            prompt += f"음성의 감정({voice_emotion['label']})과 텍스트의 감정({text_sentiment['label']})이 조화를 이루며, "
+            prompt += f"음성의 특징을 반영한 동적인 구도. 발화 내용: '{text}'"
+            return prompt
+        def save_reflection(text, state):
+            """감상 저장"""
+            if not text.strip():
+                return state, state["reflections"]
+            current_time = datetime.now().strftime("%H:%M:%S")
+            sentiment = text_analyzer(text)[0]
+            new_reflection = [current_time, text, f"{sentiment['label']} ({sentiment['score']:.2f})"]
+            if "reflections" not in state:
+                state["reflections"] = []
+            state["reflections"].append(new_reflection)
+            return state, state["reflections"]
         # 이벤트 연결
         start_btn.click(
         )
         save_btn.click(
+            fn=save_reflection,
             inputs=[reflection_input, state],
             outputs=[state, reflections_display]
         )
             outputs=[state, transcribed_text, voice_emotion, text_emotion, final_prompt]
         )
     return app
 if __name__ == "__main__":