haepada commited on
Commit
9f7512d
·
verified ·
1 Parent(s): 403557d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -43
app.py CHANGED
@@ -2,56 +2,151 @@ import gradio as gr
2
  import numpy as np
3
  import librosa
4
  from transformers import pipeline
 
5
 
6
- # Initialize models
7
  emotion_analyzer = pipeline("audio-classification", model="MIT/ast-finetuned-speech-commands-v2")
8
  speech_recognizer = pipeline("automatic-speech-recognition",
9
  model="kresnik/wav2vec2-large-xlsr-korean")
10
 
11
- def analyze_voice(audio_file):
12
- """Voice analysis function"""
13
- try:
14
- # Load audio
15
- y, sr = librosa.load(audio_file)
 
 
 
16
 
17
- # 1. Voice emotion analysis
18
- emotions = emotion_analyzer(y)
19
- primary_emotion = emotions[0]
20
 
21
- # 2. Speech to text
22
- text_result = speech_recognizer(y)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # 3. Extract audio features
25
- mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
26
- energy = np.mean(librosa.feature.rms(y=y))
 
 
27
 
28
- return {
29
- "emotion": primary_emotion['label'],
30
- "emotion_probability": f"{primary_emotion['score']:.2f}",
31
- "transcribed_text": text_result['text'],
32
- "energy_level": f"{energy:.2f}",
33
- "status": "Analysis complete"
34
- }
35
- except Exception as e:
36
- return {
37
- "error": str(e),
38
- "status": "Error occurred"
39
- }
40
-
41
- # Create Gradio interface
42
- interface = gr.Interface(
43
- fn=analyze_voice,
44
- inputs=gr.Audio(
45
- label="Voice Input",
46
- sources=["microphone", "upload"],
47
- type="filepath"
48
- ),
49
- outputs=gr.JSON(label="Analysis Results"),
50
- title="Digital Gut - Voice Emotion Analysis",
51
- description="Performs emotion analysis and text conversion from voice input.",
52
- theme=gr.themes.Soft()
53
- )
54
-
55
- # Launch app
56
  if __name__ == "__main__":
57
- interface.launch()
 
 
2
  import numpy as np
3
  import librosa
4
  from transformers import pipeline
5
+ import json
6
 
7
+ # Initialize AI models
8
  emotion_analyzer = pipeline("audio-classification", model="MIT/ast-finetuned-speech-commands-v2")
9
  speech_recognizer = pipeline("automatic-speech-recognition",
10
  model="kresnik/wav2vec2-large-xlsr-korean")
11
 
12
+ # Global state management
13
+ current_stage = "intro"
14
+ session_data = {}
15
+
16
+ def create_interface():
17
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
18
+ # State management
19
+ state = gr.State(value={"stage": "intro", "session_data": {}})
20
 
21
+ # Header
22
+ gr.Markdown("# 디지털 굿판")
 
23
 
24
+ # Navigation tabs
25
+ with gr.Tabs() as tabs:
26
+ # Intro/세계관 Stage
27
+ with gr.Tab("입장", id="intro"):
28
+ gr.Markdown("""
29
+ # 디지털 굿판에 오신 것을 환영합니다
30
+ 온천천의 디지털 치유 공간으로 들어가보세요.
31
+ """)
32
+ intro_next = gr.Button("여정 시작하기")
33
+
34
+ # 청신 Stage (Sound Purification)
35
+ with gr.Tab("청신", id="cleansing", visible=False):
36
+ with gr.Row():
37
+ audio_player = gr.Audio(
38
+ value="path_to_default_sound.mp3", # 기본 사운드 파일
39
+ type="filepath",
40
+ label="온천천의 소리"
41
+ )
42
+ location_info = gr.Textbox(
43
+ label="현재 위치",
44
+ value="온천장역",
45
+ interactive=False
46
+ )
47
+ cleansing_next = gr.Button("다음 단계로")
48
+
49
+ # 기원 Stage (Voice Analysis)
50
+ with gr.Tab("기원", id="voice", visible=False):
51
+ with gr.Row():
52
+ # Voice input component
53
+ voice_input = gr.Audio(
54
+ label="목소리로 전하기",
55
+ sources=["microphone", "upload"],
56
+ type="filepath"
57
+ )
58
+
59
+ # Analysis results
60
+ with gr.Column():
61
+ emotion_output = gr.JSON(
62
+ label="감정 분석 결과",
63
+ visible=True
64
+ )
65
+ text_output = gr.Textbox(
66
+ label="음성 텍스트",
67
+ visible=True
68
+ )
69
+ voice_next = gr.Button("다음 단계로")
70
+
71
+ # 송신 Stage (Sharing)
72
+ with gr.Tab("송신", id="sharing", visible=False):
73
+ with gr.Row():
74
+ gr.Gallery(
75
+ label="생성된 이미지",
76
+ show_label=True,
77
+ elem_id="gallery"
78
+ )
79
+ gr.Markdown("## 공동체와 함께 나누기")
80
+ complete_button = gr.Button("완료")
81
+
82
+ # Floating navigation menu
83
+ with gr.Row(visible=True) as float_menu:
84
+ gr.Button("🏠", scale=1)
85
+ gr.Button("🎵", scale=1)
86
+ gr.Button("🎤", scale=1)
87
+ gr.Button("🖼️", scale=1)
88
+
89
+ # Voice analysis function
90
+ def analyze_voice(audio_file, state):
91
+ try:
92
+ if audio_file is None:
93
+ return {"error": "No audio input provided"}, state
94
+
95
+ # Load audio
96
+ y, sr = librosa.load(audio_file)
97
+
98
+ # Emotion analysis
99
+ emotions = emotion_analyzer(y)
100
+ primary_emotion = emotions[0]
101
+
102
+ # Speech to text
103
+ text_result = speech_recognizer(y)
104
+
105
+ # Update state
106
+ state["voice_analysis"] = {
107
+ "emotion": primary_emotion['label'],
108
+ "probability": float(primary_emotion['score']),
109
+ "text": text_result['text']
110
+ }
111
+
112
+ return {
113
+ "emotion": primary_emotion['label'],
114
+ "emotion_probability": f"{primary_emotion['score']:.2f}",
115
+ "transcribed_text": text_result['text'],
116
+ "status": "Analysis complete"
117
+ }, state
118
+ except Exception as e:
119
+ return {"error": str(e), "status": "Error occurred"}, state
120
+
121
+ # Event handlers
122
+ voice_input.change(
123
+ fn=analyze_voice,
124
+ inputs=[voice_input, state],
125
+ outputs=[emotion_output, state]
126
+ )
127
+
128
+ # Stage navigation
129
+ intro_next.click(
130
+ fn=lambda s: {"stage": "cleansing", **s},
131
+ inputs=[state],
132
+ outputs=[state],
133
+ )
134
 
135
+ cleansing_next.click(
136
+ fn=lambda s: {"stage": "voice", **s},
137
+ inputs=[state],
138
+ outputs=[state],
139
+ )
140
 
141
+ voice_next.click(
142
+ fn=lambda s: {"stage": "sharing", **s},
143
+ inputs=[state],
144
+ outputs=[state],
145
+ )
146
+
147
+ return app
148
+
149
+ # Launch the application
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  if __name__ == "__main__":
151
+ app = create_interface()
152
+ app.launch()