Update app.py
Browse files
app.py
CHANGED
|
@@ -24,14 +24,6 @@ text_analyzer = pipeline(
|
|
| 24 |
model="nlptown/bert-base-multilingual-uncased-sentiment"
|
| 25 |
)
|
| 26 |
|
| 27 |
-
def create_interface():
|
| 28 |
-
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
| 29 |
-
state = gr.State({
|
| 30 |
-
"user_name": "",
|
| 31 |
-
"reflections": [],
|
| 32 |
-
"voice_analysis": None,
|
| 33 |
-
"final_prompt": ""
|
| 34 |
-
})
|
| 35 |
def generate_image_from_prompt(prompt):
|
| 36 |
"""HuggingFace Inference API를 통한 이미지 생성"""
|
| 37 |
try:
|
|
@@ -45,14 +37,21 @@ def generate_image_from_prompt(prompt):
|
|
| 45 |
})
|
| 46 |
|
| 47 |
if response.status_code == 200:
|
| 48 |
-
return response.content
|
| 49 |
else:
|
| 50 |
return None
|
| 51 |
except Exception as e:
|
| 52 |
print(f"Error generating image: {e}")
|
| 53 |
return None
|
| 54 |
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
# 헤더
|
| 58 |
header = gr.Markdown("# 디지털 굿판")
|
|
@@ -68,7 +67,6 @@ def generate_image_from_prompt(prompt):
|
|
| 68 |
# 청신
|
| 69 |
with gr.Tab("청신"):
|
| 70 |
with gr.Row():
|
| 71 |
-
# 절대 경로로 변경
|
| 72 |
audio_path = os.path.abspath(os.path.join("assets", "main_music.mp3"))
|
| 73 |
audio = gr.Audio(
|
| 74 |
value=audio_path,
|
|
@@ -116,26 +114,20 @@ def generate_image_from_prompt(prompt):
|
|
| 116 |
)
|
| 117 |
analyze_btn = gr.Button("분석하기")
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
)
|
| 134 |
-
generate_btn = gr.Button("이미지 생성하기")
|
| 135 |
-
result_image = gr.Image(
|
| 136 |
-
label="생성된 이미지",
|
| 137 |
-
type="pil"
|
| 138 |
-
)
|
| 139 |
|
| 140 |
def clear_voice_input():
|
| 141 |
"""음성 입력 초기화"""
|
|
@@ -147,18 +139,11 @@ with gr.Tab("송신"):
|
|
| 147 |
return state, "음성을 먼저 녹음해주세요.", "", "", ""
|
| 148 |
|
| 149 |
try:
|
| 150 |
-
# 오디오 로드
|
| 151 |
y, sr = librosa.load(audio_path, sr=16000)
|
| 152 |
-
|
| 153 |
-
# 음성 인식
|
| 154 |
transcription = speech_recognizer(y)
|
| 155 |
text = transcription["text"]
|
| 156 |
-
|
| 157 |
-
# 감정 분석
|
| 158 |
voice_emotions = emotion_classifier(y)
|
| 159 |
text_sentiment = text_analyzer(text)[0]
|
| 160 |
-
|
| 161 |
-
# 프롬프트 생성
|
| 162 |
prompt = generate_prompt(text, voice_emotions[0], text_sentiment)
|
| 163 |
|
| 164 |
return (
|
|
@@ -227,7 +212,13 @@ with gr.Tab("송신"):
|
|
| 227 |
outputs=[state, transcribed_text, voice_emotion, text_emotion, final_prompt]
|
| 228 |
)
|
| 229 |
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
if __name__ == "__main__":
|
| 233 |
demo = create_interface()
|
|
|
|
| 24 |
model="nlptown/bert-base-multilingual-uncased-sentiment"
|
| 25 |
)
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def generate_image_from_prompt(prompt):
|
| 28 |
"""HuggingFace Inference API를 통한 이미지 생성"""
|
| 29 |
try:
|
|
|
|
| 37 |
})
|
| 38 |
|
| 39 |
if response.status_code == 200:
|
| 40 |
+
return response.content
|
| 41 |
else:
|
| 42 |
return None
|
| 43 |
except Exception as e:
|
| 44 |
print(f"Error generating image: {e}")
|
| 45 |
return None
|
| 46 |
|
| 47 |
+
def create_interface():
|
| 48 |
+
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
| 49 |
+
state = gr.State({
|
| 50 |
+
"user_name": "",
|
| 51 |
+
"reflections": [],
|
| 52 |
+
"voice_analysis": None,
|
| 53 |
+
"final_prompt": ""
|
| 54 |
+
})
|
| 55 |
|
| 56 |
# 헤더
|
| 57 |
header = gr.Markdown("# 디지털 굿판")
|
|
|
|
| 67 |
# 청신
|
| 68 |
with gr.Tab("청신"):
|
| 69 |
with gr.Row():
|
|
|
|
| 70 |
audio_path = os.path.abspath(os.path.join("assets", "main_music.mp3"))
|
| 71 |
audio = gr.Audio(
|
| 72 |
value=audio_path,
|
|
|
|
| 114 |
)
|
| 115 |
analyze_btn = gr.Button("분석하기")
|
| 116 |
|
| 117 |
+
# 송신
|
| 118 |
+
with gr.Tab("송신"):
|
| 119 |
+
gr.Markdown("## 송신 - 시각화 결과")
|
| 120 |
+
with gr.Column():
|
| 121 |
+
final_prompt = gr.Textbox(
|
| 122 |
+
label="생성된 프롬프트",
|
| 123 |
+
interactive=False,
|
| 124 |
+
lines=3
|
| 125 |
+
)
|
| 126 |
+
generate_btn = gr.Button("이미지 생성하기")
|
| 127 |
+
result_image = gr.Image(
|
| 128 |
+
label="생성된 이미지",
|
| 129 |
+
type="pil"
|
| 130 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
def clear_voice_input():
|
| 133 |
"""음성 입력 초기화"""
|
|
|
|
| 139 |
return state, "음성을 먼저 녹음해주세요.", "", "", ""
|
| 140 |
|
| 141 |
try:
|
|
|
|
| 142 |
y, sr = librosa.load(audio_path, sr=16000)
|
|
|
|
|
|
|
| 143 |
transcription = speech_recognizer(y)
|
| 144 |
text = transcription["text"]
|
|
|
|
|
|
|
| 145 |
voice_emotions = emotion_classifier(y)
|
| 146 |
text_sentiment = text_analyzer(text)[0]
|
|
|
|
|
|
|
| 147 |
prompt = generate_prompt(text, voice_emotions[0], text_sentiment)
|
| 148 |
|
| 149 |
return (
|
|
|
|
| 212 |
outputs=[state, transcribed_text, voice_emotion, text_emotion, final_prompt]
|
| 213 |
)
|
| 214 |
|
| 215 |
+
generate_btn.click(
|
| 216 |
+
fn=generate_image_from_prompt,
|
| 217 |
+
inputs=[final_prompt],
|
| 218 |
+
outputs=[result_image]
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
return app
|
| 222 |
|
| 223 |
if __name__ == "__main__":
|
| 224 |
demo = create_interface()
|