haepada commited on
Commit
8b6ea6c
·
verified ·
1 Parent(s): e006b08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -46
app.py CHANGED
@@ -4,14 +4,11 @@ import librosa
4
  from transformers import pipeline
5
  from datetime import datetime
6
  import os
7
- from diffusers import StableDiffusionPipeline
8
- import torch
9
 
10
- # 스테이블 디퓨전 초기화
11
- model_id = "runwayml/stable-diffusion-v1-5"
12
- pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
13
- if torch.cuda.is_available():
14
- pipe = pipe.to("cuda")
15
 
16
  # AI 모델 초기화
17
  speech_recognizer = pipeline(
@@ -33,9 +30,29 @@ def create_interface():
33
  "user_name": "",
34
  "reflections": [],
35
  "voice_analysis": None,
36
- "final_prompt": "",
37
- "generated_images": []
38
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  # 헤더
41
  header = gr.Markdown("# 디지털 굿판")
@@ -52,7 +69,7 @@ def create_interface():
52
  with gr.Tab("청신"):
53
  with gr.Row():
54
  # 절대 경로로 변경
55
- audio_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "assets", "main_music.mp3"))
56
  audio = gr.Audio(
57
  value=audio_path,
58
  type="filepath",
@@ -76,7 +93,6 @@ def create_interface():
76
  gr.Markdown("## 기원 - 목소리로 전하기")
77
  with gr.Row():
78
  with gr.Column():
79
- record_btn = gr.Button("🎤 녹음 시작/중지")
80
  voice_input = gr.Audio(
81
  label="나누고 싶은 이야기를 들려주세요",
82
  sources=["microphone"],
@@ -100,21 +116,26 @@ def create_interface():
100
  )
101
  analyze_btn = gr.Button("분석하기")
102
 
103
- # 송신
104
- with gr.Tab("송신"):
105
- gr.Markdown("## 송신 - 시각화 결과")
106
- with gr.Column():
107
- final_prompt = gr.Textbox(
108
- label="생성된 프롬프트",
109
- interactive=False
110
- )
111
- generate_btn = gr.Button("이미지 생성하기")
112
- gallery = gr.Gallery(
113
- label="시각화 결과",
114
- columns=2,
115
- show_label=True,
116
- elem_id="gallery"
117
- )
 
 
 
 
 
118
 
119
  def clear_voice_input():
120
  """음성 입력 초기화"""
@@ -136,29 +157,50 @@ def create_interface():
136
  # 감정 분석
137
  voice_emotions = emotion_classifier(y)
138
  text_sentiment = text_analyzer(text)[0]
 
 
 
139
 
140
  return (
141
  state,
142
  text,
143
  f"음성 감정: {voice_emotions[0]['label']} ({voice_emotions[0]['score']:.2f})",
144
  f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})",
145
- "분석이 완료되었습니다."
146
  )
147
  except Exception as e:
148
  return state, f"오류 발생: {str(e)}", "", "", ""
149
 
150
- def generate_image(prompt, state):
151
- """이미지 생성"""
152
- try:
153
- images = pipe(prompt).images
154
- image_paths = []
155
- for i, image in enumerate(images):
156
- path = f"output_{i}.png"
157
- image.save(path)
158
- image_paths.append(path)
159
- return image_paths
160
- except Exception as e:
161
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  # 이벤트 연결
164
  start_btn.click(
@@ -168,7 +210,7 @@ def create_interface():
168
  )
169
 
170
  save_btn.click(
171
- fn=lambda text, state: save_reflection(text, state),
172
  inputs=[reflection_input, state],
173
  outputs=[state, reflections_display]
174
  )
@@ -185,12 +227,6 @@ def create_interface():
185
  outputs=[state, transcribed_text, voice_emotion, text_emotion, final_prompt]
186
  )
187
 
188
- generate_btn.click(
189
- fn=generate_image,
190
- inputs=[final_prompt, state],
191
- outputs=[gallery]
192
- )
193
-
194
  return app
195
 
196
  if __name__ == "__main__":
 
4
  from transformers import pipeline
5
  from datetime import datetime
6
  import os
7
+ import requests
 
8
 
9
+ # Inference API 설정
10
+ API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0"
11
+ headers = {"Authorization": "Bearer hf_..."} # 여기에 HuggingFace 토큰 입력
 
 
12
 
13
  # AI 모델 초기화
14
  speech_recognizer = pipeline(
 
30
  "user_name": "",
31
  "reflections": [],
32
  "voice_analysis": None,
33
+ "final_prompt": ""
 
34
  })
35
+ def generate_image_from_prompt(prompt):
36
+ """HuggingFace Inference API를 통한 이미지 생성"""
37
+ try:
38
+ response = requests.post(API_URL, headers=headers, json={
39
+ "inputs": prompt,
40
+ "parameters": {
41
+ "negative_prompt": "ugly, blurry, poor quality, distorted",
42
+ "num_inference_steps": 30,
43
+ "guidance_scale": 7.5
44
+ }
45
+ })
46
+
47
+ if response.status_code == 200:
48
+ return response.content # 바이너리 이미지 데이터 반환
49
+ else:
50
+ return None
51
+ except Exception as e:
52
+ print(f"Error generating image: {e}")
53
+ return None
54
+
55
+
56
 
57
  # 헤더
58
  header = gr.Markdown("# 디지털 굿판")
 
69
  with gr.Tab("청신"):
70
  with gr.Row():
71
  # 절대 경로로 변경
72
+ audio_path = os.path.abspath(os.path.join("assets", "main_music.mp3"))
73
  audio = gr.Audio(
74
  value=audio_path,
75
  type="filepath",
 
93
  gr.Markdown("## 기원 - 목소리로 전하기")
94
  with gr.Row():
95
  with gr.Column():
 
96
  voice_input = gr.Audio(
97
  label="나누고 싶은 이야기를 들려주세요",
98
  sources=["microphone"],
 
116
  )
117
  analyze_btn = gr.Button("분석하기")
118
 
119
+ # 이벤트 핸들러 추가
120
+ generate_btn.click(
121
+ fn=generate_image_from_prompt,
122
+ inputs=[final_prompt],
123
+ outputs=[result_image]
124
+ )
125
+ # 송신 탭 부분 수정
126
+ with gr.Tab("송신"):
127
+ gr.Markdown("## 송신 - 시각화 결과")
128
+ with gr.Column():
129
+ final_prompt = gr.Textbox(
130
+ label="생성된 프롬프트",
131
+ interactive=False,
132
+ lines=3
133
+ )
134
+ generate_btn = gr.Button("이미지 생성하기")
135
+ result_image = gr.Image(
136
+ label="생성된 이미지",
137
+ type="pil"
138
+ )
139
 
140
  def clear_voice_input():
141
  """음성 입력 초기화"""
 
157
  # 감정 분석
158
  voice_emotions = emotion_classifier(y)
159
  text_sentiment = text_analyzer(text)[0]
160
+
161
+ # 프롬프트 생성
162
+ prompt = generate_prompt(text, voice_emotions[0], text_sentiment)
163
 
164
  return (
165
  state,
166
  text,
167
  f"음성 감정: {voice_emotions[0]['label']} ({voice_emotions[0]['score']:.2f})",
168
  f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})",
169
+ prompt
170
  )
171
  except Exception as e:
172
  return state, f"오류 발생: {str(e)}", "", "", ""
173
 
174
+ def generate_prompt(text, voice_emotion, text_sentiment):
175
+ """프롬프트 생성"""
176
+ emotion_colors = {
177
+ "happy": "따뜻한 노란색과 주황색",
178
+ "sad": "깊은 파랑색과 보라색",
179
+ "angry": "강렬한 빨강색과 검정색",
180
+ "neutral": "부드러운 회색과 베이지색"
181
+ }
182
+
183
+ color = emotion_colors.get(voice_emotion['label'], "자연스러운 색상")
184
+ prompt = f"한국 전통 민화 스타일의 추상화, {color} 사용. "
185
+ prompt += f"음성의 감정({voice_emotion['label']})과 텍스트의 감정({text_sentiment['label']})이 조화를 이루며, "
186
+ prompt += f"음성의 특징을 반영한 동적인 구도. 발화 내용: '{text}'"
187
+
188
+ return prompt
189
+
190
+ def save_reflection(text, state):
191
+ """감상 저장"""
192
+ if not text.strip():
193
+ return state, state["reflections"]
194
+
195
+ current_time = datetime.now().strftime("%H:%M:%S")
196
+ sentiment = text_analyzer(text)[0]
197
+ new_reflection = [current_time, text, f"{sentiment['label']} ({sentiment['score']:.2f})"]
198
+
199
+ if "reflections" not in state:
200
+ state["reflections"] = []
201
+
202
+ state["reflections"].append(new_reflection)
203
+ return state, state["reflections"]
204
 
205
  # 이벤트 연결
206
  start_btn.click(
 
210
  )
211
 
212
  save_btn.click(
213
+ fn=save_reflection,
214
  inputs=[reflection_input, state],
215
  outputs=[state, reflections_display]
216
  )
 
227
  outputs=[state, transcribed_text, voice_emotion, text_emotion, final_prompt]
228
  )
229
 
 
 
 
 
 
 
230
  return app
231
 
232
  if __name__ == "__main__":