haepada commited on
Commit
00ada4a
·
verified ·
1 Parent(s): 9f7512d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +179 -127
app.py CHANGED
@@ -3,150 +3,202 @@ import numpy as np
3
  import librosa
4
  from transformers import pipeline
5
  import json
 
 
6
 
7
- # Initialize AI models
8
- emotion_analyzer = pipeline("audio-classification", model="MIT/ast-finetuned-speech-commands-v2")
 
 
 
 
 
 
 
9
  speech_recognizer = pipeline("automatic-speech-recognition",
10
  model="kresnik/wav2vec2-large-xlsr-korean")
 
 
 
 
11
 
12
- # Global state management
13
- current_stage = "intro"
14
- session_data = {}
 
 
 
 
 
 
 
15
 
16
- def create_interface():
17
- with gr.Blocks(theme=gr.themes.Soft()) as app:
18
- # State management
19
- state = gr.State(value={"stage": "intro", "session_data": {}})
20
-
21
- # Header
22
- gr.Markdown("# 디지털 굿판")
23
-
24
- # Navigation tabs
25
- with gr.Tabs() as tabs:
26
- # Intro/세계관 Stage
27
- with gr.Tab("입장", id="intro"):
28
- gr.Markdown("""
29
- # 디지털 굿판에 오신 것을 환영합니다
30
- 온천천의 디지털 치유 공간으로 들어가보세요.
31
- """)
32
- intro_next = gr.Button("여정 시작하기")
33
 
34
- # 청신 Stage (Sound Purification)
35
- with gr.Tab("청신", id="cleansing", visible=False):
36
- with gr.Row():
37
- audio_player = gr.Audio(
38
- value="path_to_default_sound.mp3", # 기본 사운드 파일
39
- type="filepath",
40
- label="온천천의 소리"
41
- )
42
- location_info = gr.Textbox(
43
- label="현재 위치",
44
- value="온천장역",
45
- interactive=False
46
- )
47
- cleansing_next = gr.Button("다음 단계로")
48
 
49
- # 기원 Stage (Voice Analysis)
50
- with gr.Tab("기원", id="voice", visible=False):
51
- with gr.Row():
52
- # Voice input component
53
- voice_input = gr.Audio(
54
- label="목소리로 전하기",
55
- sources=["microphone", "upload"],
56
- type="filepath"
57
- )
58
-
59
- # Analysis results
60
- with gr.Column():
61
- emotion_output = gr.JSON(
62
- label="감정 분석 결과",
63
- visible=True
 
 
 
 
64
  )
65
- text_output = gr.Textbox(
66
- label="음성 텍스트",
67
- visible=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  )
69
- voice_next = gr.Button("다음 단계로")
 
 
 
 
 
 
70
 
71
- # 송신 Stage (Sharing)
72
- with gr.Tab("송신", id="sharing", visible=False):
73
- with gr.Row():
74
- gr.Gallery(
75
- label="생성된 이미지",
76
- show_label=True,
77
- elem_id="gallery"
78
- )
79
- gr.Markdown("## 공동체와 함께 나누기")
80
- complete_button = gr.Button("완료")
81
 
82
- # Floating navigation menu
83
- with gr.Row(visible=True) as float_menu:
84
- gr.Button("🏠", scale=1)
85
- gr.Button("🎵", scale=1)
86
- gr.Button("🎤", scale=1)
87
- gr.Button("🖼️", scale=1)
88
 
89
- # Voice analysis function
90
- def analyze_voice(audio_file, state):
91
- try:
92
- if audio_file is None:
93
- return {"error": "No audio input provided"}, state
94
-
95
- # Load audio
96
- y, sr = librosa.load(audio_file)
97
 
98
- # Emotion analysis
99
- emotions = emotion_analyzer(y)
100
- primary_emotion = emotions[0]
101
 
102
- # Speech to text
103
- text_result = speech_recognizer(y)
104
-
105
- # Update state
106
- state["voice_analysis"] = {
107
- "emotion": primary_emotion['label'],
108
- "probability": float(primary_emotion['score']),
109
- "text": text_result['text']
110
- }
111
 
112
- return {
113
- "emotion": primary_emotion['label'],
114
- "emotion_probability": f"{primary_emotion['score']:.2f}",
115
- "transcribed_text": text_result['text'],
116
- "status": "Analysis complete"
117
- }, state
118
- except Exception as e:
119
- return {"error": str(e), "status": "Error occurred"}, state
 
 
 
 
 
 
 
 
120
 
121
- # Event handlers
122
- voice_input.change(
123
- fn=analyze_voice,
124
- inputs=[voice_input, state],
125
- outputs=[emotion_output, state]
126
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
- # Stage navigation
129
- intro_next.click(
130
- fn=lambda s: {"stage": "cleansing", **s},
131
- inputs=[state],
132
- outputs=[state],
133
- )
134
-
135
- cleansing_next.click(
136
- fn=lambda s: {"stage": "voice", **s},
137
- inputs=[state],
138
- outputs=[state],
139
- )
140
-
141
- voice_next.click(
142
- fn=lambda s: {"stage": "sharing", **s},
143
- inputs=[state],
144
- outputs=[state],
145
- )
146
 
147
- return app
 
 
 
 
 
 
 
148
 
149
- # Launch the application
150
  if __name__ == "__main__":
151
- app = create_interface()
152
- app.launch()
 
 
3
  import librosa
4
  from transformers import pipeline
5
  import json
6
+ import time
7
+ from datetime import datetime
8
 
9
+ # 전역 상수
10
+ STAGES = {
11
+ "INTRO": "입장",
12
+ "CLEANSING": "청신",
13
+ "PRAYER": "기원",
14
+ "SHARING": "송신"
15
+ }
16
+
17
+ # AI 모델 초기화
18
  speech_recognizer = pipeline("automatic-speech-recognition",
19
  model="kresnik/wav2vec2-large-xlsr-korean")
20
+ emotion_classifier = pipeline("audio-classification",
21
+ model="MIT/ast-finetuned-speech-commands-v2")
22
+ text_analyzer = pipeline("sentiment-analysis",
23
+ model="nlptown/bert-base-multilingual-uncased-sentiment")
24
 
25
+ class DigitalGutApp:
26
+ def __init__(self):
27
+ self.current_stage = "INTRO"
28
+ self.user_name = ""
29
+ self.session_data = {
30
+ "reflections": [],
31
+ "voice_analysis": None,
32
+ "generated_prompts": [],
33
+ "current_location": "온천장역"
34
+ }
35
 
36
+ def create_interface(self):
37
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
38
+ # 상태 관리
39
+ state = gr.State(self.session_data)
40
+ current_stage = gr.State(self.current_stage)
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ # 헤더
43
+ with gr.Column(visible=True) as header:
44
+ gr.Markdown("# 디지털 굿판")
45
+ stage_indicator = gr.Markdown(self._get_stage_description())
 
 
 
 
 
 
 
 
 
 
46
 
47
+ # 메인 컨텐츠 영역
48
+ with gr.Column() as main_content:
49
+ # 1. 입장 화면
50
+ with gr.Column(visible=lambda: self.current_stage == "INTRO") as intro_screen:
51
+ gr.Markdown("""
52
+ # 디지털 굿판에 오신 것을 환영합니다
53
+ 온천천의 디지털 치유 공간으로 들어가보세요.
54
+ """)
55
+ name_input = gr.Textbox(label="이름을 알려주세요")
56
+ start_button = gr.Button("여정 시작하기")
57
+
58
+ # 2. 청신 화면 (음악 감상)
59
+ with gr.Column(visible=lambda: self.current_stage == "CLEANSING") as cleansing_screen:
60
+ with gr.Row():
61
+ # 음악 플레이어
62
+ audio_player = gr.Audio(
63
+ value="assets/main_music.mp3",
64
+ type="filepath",
65
+ label="온천천의 소리"
66
  )
67
+ # 감상 입력
68
+ with gr.Column():
69
+ reflection_input = gr.Textbox(
70
+ label="현재 순간의 감상을 적어주세요",
71
+ lines=3
72
+ )
73
+ save_reflection = gr.Button("감상 저장")
74
+ reflections_display = gr.Dataframe(
75
+ headers=["시간", "감상", "감정"],
76
+ label="기록된 감상들"
77
+ )
78
+
79
+ # 3. 기원 화면 (음성 분석)
80
+ with gr.Column(visible=lambda: self.current_stage == "PRAYER") as prayer_screen:
81
+ with gr.Row():
82
+ # 음성 입력
83
+ voice_input = gr.Audio(
84
+ label="나누고 싶은 이야기를 들려주세요",
85
+ sources=["microphone"],
86
+ type="filepath"
87
  )
88
+ # 분석 결과
89
+ analysis_output = gr.JSON(label="분석 결과")
90
+
91
+ # 4. 송신 화면 (결과 공유)
92
+ with gr.Column(visible=lambda: self.current_stage == "SHARING") as sharing_screen:
93
+ final_prompt = gr.Textbox(label="생성된 프롬프트")
94
+ gallery = gr.Gallery(label="시각화 결과")
95
 
96
+ # 플로팅 메뉴
97
+ with gr.Column(visible=True) as floating_menu:
98
+ gr.Button("🏠", scale=1)
99
+ gr.Button("🎵", scale=1)
100
+ gr.Button("🎤", scale=1)
101
+ gr.Button("🖼️", scale=1)
 
 
 
 
102
 
103
+ # 이벤트 핸들러 정의
104
+ def start_journey(name):
105
+ self.user_name = name
106
+ self.current_stage = "CLEANSING"
107
+ return self._update_visibility()
 
108
 
109
+ def save_reflection(text, state):
110
+ if not text.strip():
111
+ return state, gr.update()
 
 
 
 
 
112
 
113
+ current_time = datetime.now().strftime("%H:%M:%S")
114
+ sentiment = text_analyzer(text)[0]
115
+ new_reflection = [current_time, text, sentiment["label"]]
116
 
117
+ state["reflections"].append(new_reflection)
118
+ return state, state["reflections"]
119
+
120
+ def analyze_voice(audio, state):
121
+ if audio is None:
122
+ return {"error": "음성 입력이 없습니다."}
 
 
 
123
 
124
+ result = self._comprehensive_voice_analysis(audio)
125
+ state["voice_analysis"] = result
126
+ return result, state
127
+
128
+ # 이벤트 연결
129
+ start_button.click(
130
+ fn=start_journey,
131
+ inputs=[name_input],
132
+ outputs=[intro_screen, cleansing_screen, prayer_screen, sharing_screen]
133
+ )
134
+
135
+ save_reflection.click(
136
+ fn=save_reflection,
137
+ inputs=[reflection_input, state],
138
+ outputs=[state, reflections_display]
139
+ )
140
 
141
+ voice_input.change(
142
+ fn=analyze_voice,
143
+ inputs=[voice_input, state],
144
+ outputs=[analysis_output, state]
145
+ )
146
+
147
+ return app
148
+
149
+ def _comprehensive_voice_analysis(self, audio_path):
150
+ """종합적인 음성 분석 수행"""
151
+ try:
152
+ y, sr = librosa.load(audio_path)
153
+
154
+ # 1. 음향학적 특성 분석
155
+ acoustic_features = {
156
+ "energy": float(np.mean(librosa.feature.rms(y=y))),
157
+ "pitch_mean": float(np.mean(librosa.pitch_tuning(y))),
158
+ "tempo": float(librosa.beat.tempo(y)[0]),
159
+ "mfcc": librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1).tolist()
160
+ }
161
+
162
+ # 2. 음성 감정 분석
163
+ emotion_result = emotion_classifier(y)
164
+
165
+ # 3. 음성-텍스트 변환
166
+ text_result = speech_recognizer(y)
167
+
168
+ # 4. 텍스트 감정 분석
169
+ text_sentiment = text_analyzer(text_result["text"])[0]
170
+
171
+ return {
172
+ "acoustic_analysis": acoustic_features,
173
+ "emotion": emotion_result[0],
174
+ "transcription": text_result["text"],
175
+ "text_sentiment": text_sentiment
176
+ }
177
+
178
+ except Exception as e:
179
+ return {"error": str(e)}
180
 
181
+ def _get_stage_description(self):
182
+ """현재 단계에 대한 설명 반환"""
183
+ descriptions = {
184
+ "INTRO": "디지털 굿판에 오신 것을 환영합니다",
185
+ "CLEANSING": "청신 - 소리로 정화하기",
186
+ "PRAYER": "기원 - 목소리로 전하기",
187
+ "SHARING": "송신 - 함께 나누기"
188
+ }
189
+ return descriptions.get(self.current_stage, "")
 
 
 
 
 
 
 
 
 
190
 
191
+ def _update_visibility(self):
192
+ """현재 단계에 따른 화면 가시성 업데이트"""
193
+ return {
194
+ "intro_screen": self.current_stage == "INTRO",
195
+ "cleansing_screen": self.current_stage == "CLEANSING",
196
+ "prayer_screen": self.current_stage == "PRAYER",
197
+ "sharing_screen": self.current_stage == "SHARING"
198
+ }
199
 
200
+ # 실행
201
  if __name__ == "__main__":
202
+ app = DigitalGutApp()
203
+ interface = app.create_interface()
204
+ interface.launch()