haepada commited on
Commit
4991658
·
verified ·
1 Parent(s): c018536

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -31
app.py CHANGED
@@ -210,38 +210,48 @@ def analyze_voice(audio_data, state):
210
  return state, "음성을 먼저 녹음해주세요.", "", "", ""
211
 
212
  try:
213
- if isinstance(audio_data, tuple):
214
- sr, y = audio_data
215
- elif isinstance(audio_data, str):
216
- y, sr = librosa.load(audio_data, sr=16000)
217
- else:
218
- print("Unsupported audio format")
219
- return state, "오디오 형식을 지원하지 않습니다.", "", "", ""
220
 
221
  # 음향학적 특성 분석
222
- acoustic_features = calculate_baseline_features((sr, y))
223
- if acoustic_features is None:
224
- return state, "음성 분석에 실패했습니다.", "", "", ""
225
-
226
- # 음성 감정 분석
227
- voice_emotion = map_acoustic_to_emotion(acoustic_features, state.get("baseline_features"))
228
 
229
  # 음성 인식
230
  if speech_recognizer:
231
- transcription = speech_recognizer({"sampling_rate": sr, "raw": y})
232
- text = transcription["text"]
 
 
 
 
233
  else:
234
  text = "음성 인식 모델을 불러올 수 없습니다."
235
 
 
 
 
236
  # 텍스트 감정 분석
237
  if text_analyzer and text:
238
- text_sentiment = text_analyzer(text)[0]
239
- text_result = f"텍스트 감정 분석: {text_sentiment['label']} (점수: {text_sentiment['score']:.2f})"
 
 
 
 
 
240
  else:
241
  text_sentiment = {"label": "unknown", "score": 0.0}
242
  text_result = "텍스트 감정 분석을 수행할 수 없습니다."
243
 
244
- # 결과 포맷팅
245
  voice_result = (
246
  f"음성 감정: {voice_emotion['primary']} "
247
  f"(강도: {voice_emotion['intensity']:.1f}%, 신뢰도: {voice_emotion['confidence']:.2f})\n"
@@ -255,11 +265,10 @@ def analyze_voice(audio_data, state):
255
 
256
  # 프롬프트 생성
257
  prompt = generate_detailed_prompt(text, voice_emotion, text_sentiment)
258
-
259
- # 상태 업데이트
260
  state = {**state, "final_prompt": prompt}
261
 
262
  return state, text, voice_result, text_result, prompt
 
263
  except Exception as e:
264
  print(f"Error in analyze_voice: {str(e)}")
265
  return state, f"오류 발생: {str(e)}", "", "", ""
@@ -322,16 +331,24 @@ def generate_image_from_prompt(prompt):
322
  print(f"Error generating image: {str(e)}")
323
  return None
324
 
325
- def save_reflection(text, state):
326
- """감상 저장"""
327
  if not text.strip():
328
  return state, []
329
-
330
  try:
331
  current_time = datetime.now().strftime("%H:%M:%S")
332
  if text_analyzer:
333
  sentiment = text_analyzer(text)[0]
334
  sentiment_text = f"{sentiment['label']} ({sentiment['score']:.2f})"
 
 
 
 
 
 
 
 
 
335
  else:
336
  sentiment_text = "분석 불가"
337
 
@@ -341,8 +358,8 @@ def save_reflection(text, state):
341
  state = {**state, "reflections": reflections}
342
  return state, reflections
343
  except Exception as e:
344
- print(f"Error in save_reflection: {str(e)}")
345
- return state, []
346
 
347
  def create_interface():
348
  db = SimpleDB()
@@ -383,7 +400,8 @@ def create_interface():
383
  baseline_audio = gr.Audio(
384
  label="축원 문장 녹음하기",
385
  sources=["microphone"],
386
- type="numpy"
 
387
  )
388
  set_baseline_btn = gr.Button("기준점 설정 완료", variant="primary")
389
  baseline_status = gr.Markdown("")
@@ -422,7 +440,8 @@ def create_interface():
422
  voice_input = gr.Audio(
423
  label="소원을 나누고 싶은 마음을 말해주세요",
424
  sources=["microphone"],
425
- type="numpy"
 
426
  )
427
  with gr.Row():
428
  clear_btn = gr.Button("녹음 지우기", variant="secondary")
@@ -480,15 +499,16 @@ def create_interface():
480
  return (
481
  "이름을 입력해주세요",
482
  gr.update(visible=False),
483
- current_state
 
484
  )
485
  current_state = {**current_state, "user_name": name}
486
  return (
487
  WORLDVIEW_MESSAGE,
488
  gr.update(visible=True),
489
- current_state
 
490
  )
491
-
492
  def handle_baseline(audio, current_state):
493
  if audio is None:
494
  return current_state, "음성을 먼저 녹음해주세요."
@@ -570,7 +590,7 @@ def create_interface():
570
  start_btn.click(
571
  fn=handle_start,
572
  inputs=[name_input, state],
573
- outputs=[worldview_display, worldview_display, state]
574
  )
575
 
576
  set_baseline_btn.click(
 
210
  return state, "음성을 먼저 녹음해주세요.", "", "", ""
211
 
212
  try:
213
+ # 오디오 데이터 처리
214
+ sr, y = audio_data # 직접 튜플 언패킹
215
+
216
+ if len(y) == 0:
217
+ return state, "음성이 감지되지 않았습니다.", "", "", ""
 
 
218
 
219
  # 음향학적 특성 분석
220
+ acoustic_features = {
221
+ "energy": float(np.mean(librosa.feature.rms(y=y))),
222
+ "tempo": float(librosa.beat.tempo(y=y, sr=sr)[0]),
223
+ "pitch": float(np.mean(librosa.feature.zero_crossing_rate(y=y))),
224
+ "volume": float(np.mean(np.abs(y)))
225
+ }
226
 
227
  # 음성 인식
228
  if speech_recognizer:
229
+ try:
230
+ transcription = speech_recognizer({"sampling_rate": sr, "raw": y.astype(np.float32)})
231
+ text = transcription["text"]
232
+ except Exception as e:
233
+ print(f"Speech recognition error: {e}")
234
+ text = "음성 인식 실패"
235
  else:
236
  text = "음성 인식 모델을 불러올 수 없습니다."
237
 
238
+ # 음성 감정 분석
239
+ voice_emotion = map_acoustic_to_emotion(acoustic_features, state.get("baseline_features"))
240
+
241
  # 텍스트 감정 분석
242
  if text_analyzer and text:
243
+ try:
244
+ text_sentiment = text_analyzer(text)[0]
245
+ text_result = f"텍스트 감정 분석: {text_sentiment['label']} (점수: {text_sentiment['score']:.2f})"
246
+ except Exception as e:
247
+ print(f"Text analysis error: {e}")
248
+ text_sentiment = {"label": "unknown", "score": 0.0}
249
+ text_result = "텍스트 감정 분석 실패"
250
  else:
251
  text_sentiment = {"label": "unknown", "score": 0.0}
252
  text_result = "텍스트 감정 분석을 수행할 수 없습니다."
253
 
254
+ # 결과 포맷팅
255
  voice_result = (
256
  f"음성 감정: {voice_emotion['primary']} "
257
  f"(강도: {voice_emotion['intensity']:.1f}%, 신뢰도: {voice_emotion['confidence']:.2f})\n"
 
265
 
266
  # 프롬프트 생성
267
  prompt = generate_detailed_prompt(text, voice_emotion, text_sentiment)
 
 
268
  state = {**state, "final_prompt": prompt}
269
 
270
  return state, text, voice_result, text_result, prompt
271
+
272
  except Exception as e:
273
  print(f"Error in analyze_voice: {str(e)}")
274
  return state, f"오류 발생: {str(e)}", "", "", ""
 
331
  print(f"Error generating image: {str(e)}")
332
  return None
333
 
334
+ def save_reflection_fixed(text, state):
 
335
  if not text.strip():
336
  return state, []
337
+
338
  try:
339
  current_time = datetime.now().strftime("%H:%M:%S")
340
  if text_analyzer:
341
  sentiment = text_analyzer(text)[0]
342
  sentiment_text = f"{sentiment['label']} ({sentiment['score']:.2f})"
343
+
344
+ # 감정 분석 결과를 프롬프트에 반영
345
+ emotion_prompt = generate_detailed_prompt(
346
+ text,
347
+ {"primary": sentiment['label'], "intensity": sentiment['score'] * 100,
348
+ "characteristics": ["텍스트 기반 감정"], "confidence": sentiment['score']},
349
+ sentiment
350
+ )
351
+ state = {**state, "final_prompt": emotion_prompt}
352
  else:
353
  sentiment_text = "분석 불가"
354
 
 
358
  state = {**state, "reflections": reflections}
359
  return state, reflections
360
  except Exception as e:
361
+ print(f"Error saving reflection: {e}")
362
+ return state, state.get("reflections", [])
363
 
364
  def create_interface():
365
  db = SimpleDB()
 
400
  baseline_audio = gr.Audio(
401
  label="축원 문장 녹음하기",
402
  sources=["microphone"],
403
+ type="numpy",
404
+ streaming=False
405
  )
406
  set_baseline_btn = gr.Button("기준점 설정 완료", variant="primary")
407
  baseline_status = gr.Markdown("")
 
440
  voice_input = gr.Audio(
441
  label="소원을 나누고 싶은 마음을 말해주세요",
442
  sources=["microphone"],
443
+ type="numpy",
444
+ streaming=False
445
  )
446
  with gr.Row():
447
  clear_btn = gr.Button("녹음 지우기", variant="secondary")
 
499
  return (
500
  "이름을 입력해주세요",
501
  gr.update(visible=False),
502
+ current_state,
503
+ gr.update(selected=0) # 현재 탭 유지
504
  )
505
  current_state = {**current_state, "user_name": name}
506
  return (
507
  WORLDVIEW_MESSAGE,
508
  gr.update(visible=True),
509
+ current_state,
510
+ gr.update(selected=1) # 축원 탭(두 번째 탭)으로 이동
511
  )
 
512
  def handle_baseline(audio, current_state):
513
  if audio is None:
514
  return current_state, "음성을 먼저 녹음해주세요."
 
590
  start_btn.click(
591
  fn=handle_start,
592
  inputs=[name_input, state],
593
+ outputs=[worldview_display, worldview_display, state, tabs]
594
  )
595
 
596
  set_baseline_btn.click(