Update app.py
Browse files
app.py
CHANGED
@@ -2,56 +2,151 @@ import gradio as gr
|
|
2 |
import numpy as np
|
3 |
import librosa
|
4 |
from transformers import pipeline
|
|
|
5 |
|
6 |
-
# Initialize models
|
7 |
emotion_analyzer = pipeline("audio-classification", model="MIT/ast-finetuned-speech-commands-v2")
|
8 |
speech_recognizer = pipeline("automatic-speech-recognition",
|
9 |
model="kresnik/wav2vec2-large-xlsr-korean")
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
16 |
|
17 |
-
#
|
18 |
-
|
19 |
-
primary_emotion = emotions[0]
|
20 |
|
21 |
-
#
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
27 |
|
28 |
-
|
29 |
-
"
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
"error": str(e),
|
38 |
-
"status": "Error occurred"
|
39 |
-
}
|
40 |
-
|
41 |
-
# Create Gradio interface
|
42 |
-
interface = gr.Interface(
|
43 |
-
fn=analyze_voice,
|
44 |
-
inputs=gr.Audio(
|
45 |
-
label="Voice Input",
|
46 |
-
sources=["microphone", "upload"],
|
47 |
-
type="filepath"
|
48 |
-
),
|
49 |
-
outputs=gr.JSON(label="Analysis Results"),
|
50 |
-
title="Digital Gut - Voice Emotion Analysis",
|
51 |
-
description="Performs emotion analysis and text conversion from voice input.",
|
52 |
-
theme=gr.themes.Soft()
|
53 |
-
)
|
54 |
-
|
55 |
-
# Launch app
|
56 |
if __name__ == "__main__":
|
57 |
-
|
|
|
|
2 |
import numpy as np
|
3 |
import librosa
|
4 |
from transformers import pipeline
|
5 |
+
import json
|
6 |
|
7 |
+
# Initialize AI models
|
8 |
emotion_analyzer = pipeline("audio-classification", model="MIT/ast-finetuned-speech-commands-v2")
|
9 |
speech_recognizer = pipeline("automatic-speech-recognition",
|
10 |
model="kresnik/wav2vec2-large-xlsr-korean")
|
11 |
|
12 |
+
# Global state management
|
13 |
+
current_stage = "intro"
|
14 |
+
session_data = {}
|
15 |
+
|
16 |
+
def create_interface():
|
17 |
+
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
18 |
+
# State management
|
19 |
+
state = gr.State(value={"stage": "intro", "session_data": {}})
|
20 |
|
21 |
+
# Header
|
22 |
+
gr.Markdown("# 디지털 굿판")
|
|
|
23 |
|
24 |
+
# Navigation tabs
|
25 |
+
with gr.Tabs() as tabs:
|
26 |
+
# Intro/세계관 Stage
|
27 |
+
with gr.Tab("입장", id="intro"):
|
28 |
+
gr.Markdown("""
|
29 |
+
# 디지털 굿판에 오신 것을 환영합니다
|
30 |
+
온천천의 디지털 치유 공간으로 들어가보세요.
|
31 |
+
""")
|
32 |
+
intro_next = gr.Button("여정 시작하기")
|
33 |
+
|
34 |
+
# 청신 Stage (Sound Purification)
|
35 |
+
with gr.Tab("청신", id="cleansing", visible=False):
|
36 |
+
with gr.Row():
|
37 |
+
audio_player = gr.Audio(
|
38 |
+
value="path_to_default_sound.mp3", # 기본 사운드 파일
|
39 |
+
type="filepath",
|
40 |
+
label="온천천의 소리"
|
41 |
+
)
|
42 |
+
location_info = gr.Textbox(
|
43 |
+
label="현재 위치",
|
44 |
+
value="온천장역",
|
45 |
+
interactive=False
|
46 |
+
)
|
47 |
+
cleansing_next = gr.Button("다음 단계로")
|
48 |
+
|
49 |
+
# 기원 Stage (Voice Analysis)
|
50 |
+
with gr.Tab("기원", id="voice", visible=False):
|
51 |
+
with gr.Row():
|
52 |
+
# Voice input component
|
53 |
+
voice_input = gr.Audio(
|
54 |
+
label="목소리로 전하기",
|
55 |
+
sources=["microphone", "upload"],
|
56 |
+
type="filepath"
|
57 |
+
)
|
58 |
+
|
59 |
+
# Analysis results
|
60 |
+
with gr.Column():
|
61 |
+
emotion_output = gr.JSON(
|
62 |
+
label="감정 분석 결과",
|
63 |
+
visible=True
|
64 |
+
)
|
65 |
+
text_output = gr.Textbox(
|
66 |
+
label="음성 텍스트",
|
67 |
+
visible=True
|
68 |
+
)
|
69 |
+
voice_next = gr.Button("다음 단계로")
|
70 |
+
|
71 |
+
# 송신 Stage (Sharing)
|
72 |
+
with gr.Tab("송신", id="sharing", visible=False):
|
73 |
+
with gr.Row():
|
74 |
+
gr.Gallery(
|
75 |
+
label="생성된 이미지",
|
76 |
+
show_label=True,
|
77 |
+
elem_id="gallery"
|
78 |
+
)
|
79 |
+
gr.Markdown("## 공동체와 함께 나누기")
|
80 |
+
complete_button = gr.Button("완료")
|
81 |
+
|
82 |
+
# Floating navigation menu
|
83 |
+
with gr.Row(visible=True) as float_menu:
|
84 |
+
gr.Button("🏠", scale=1)
|
85 |
+
gr.Button("🎵", scale=1)
|
86 |
+
gr.Button("🎤", scale=1)
|
87 |
+
gr.Button("🖼️", scale=1)
|
88 |
+
|
89 |
+
# Voice analysis function
|
90 |
+
def analyze_voice(audio_file, state):
|
91 |
+
try:
|
92 |
+
if audio_file is None:
|
93 |
+
return {"error": "No audio input provided"}, state
|
94 |
+
|
95 |
+
# Load audio
|
96 |
+
y, sr = librosa.load(audio_file)
|
97 |
+
|
98 |
+
# Emotion analysis
|
99 |
+
emotions = emotion_analyzer(y)
|
100 |
+
primary_emotion = emotions[0]
|
101 |
+
|
102 |
+
# Speech to text
|
103 |
+
text_result = speech_recognizer(y)
|
104 |
+
|
105 |
+
# Update state
|
106 |
+
state["voice_analysis"] = {
|
107 |
+
"emotion": primary_emotion['label'],
|
108 |
+
"probability": float(primary_emotion['score']),
|
109 |
+
"text": text_result['text']
|
110 |
+
}
|
111 |
+
|
112 |
+
return {
|
113 |
+
"emotion": primary_emotion['label'],
|
114 |
+
"emotion_probability": f"{primary_emotion['score']:.2f}",
|
115 |
+
"transcribed_text": text_result['text'],
|
116 |
+
"status": "Analysis complete"
|
117 |
+
}, state
|
118 |
+
except Exception as e:
|
119 |
+
return {"error": str(e), "status": "Error occurred"}, state
|
120 |
+
|
121 |
+
# Event handlers
|
122 |
+
voice_input.change(
|
123 |
+
fn=analyze_voice,
|
124 |
+
inputs=[voice_input, state],
|
125 |
+
outputs=[emotion_output, state]
|
126 |
+
)
|
127 |
+
|
128 |
+
# Stage navigation
|
129 |
+
intro_next.click(
|
130 |
+
fn=lambda s: {"stage": "cleansing", **s},
|
131 |
+
inputs=[state],
|
132 |
+
outputs=[state],
|
133 |
+
)
|
134 |
|
135 |
+
cleansing_next.click(
|
136 |
+
fn=lambda s: {"stage": "voice", **s},
|
137 |
+
inputs=[state],
|
138 |
+
outputs=[state],
|
139 |
+
)
|
140 |
|
141 |
+
voice_next.click(
|
142 |
+
fn=lambda s: {"stage": "sharing", **s},
|
143 |
+
inputs=[state],
|
144 |
+
outputs=[state],
|
145 |
+
)
|
146 |
+
|
147 |
+
return app
|
148 |
+
|
149 |
+
# Launch the application
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
if __name__ == "__main__":
|
151 |
+
app = create_interface()
|
152 |
+
app.launch()
|