Spaces:

haepada
/

roots

Sleeping

App Files Files Community

haepada commited on Nov 4, 2024

Commit

9f7512d

verified ·

1 Parent(s): 403557d

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -43

app.py CHANGED Viewed

@@ -2,56 +2,151 @@ import gradio as gr
 import numpy as np
 import librosa
 from transformers import pipeline
-# Initialize models
 emotion_analyzer = pipeline("audio-classification", model="MIT/ast-finetuned-speech-commands-v2")
 speech_recognizer = pipeline("automatic-speech-recognition",
                            model="kresnik/wav2vec2-large-xlsr-korean")
-def analyze_voice(audio_file):
-    """Voice analysis function"""
-    try:
-        # Load audio
-        y, sr = librosa.load(audio_file)
-        # 1. Voice emotion analysis
-        emotions = emotion_analyzer(y)
-        primary_emotion = emotions[0]
-        # 2. Speech to text
-        text_result = speech_recognizer(y)
-        # 3. Extract audio features
-        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
-        energy = np.mean(librosa.feature.rms(y=y))
-        return {
-            "emotion": primary_emotion['label'],
-            "emotion_probability": f"{primary_emotion['score']:.2f}",
-            "transcribed_text": text_result['text'],
-            "energy_level": f"{energy:.2f}",
-            "status": "Analysis complete"
-        }
-    except Exception as e:
-        return {
-            "error": str(e),
-            "status": "Error occurred"
-        }
-# Create Gradio interface
-interface = gr.Interface(
-    fn=analyze_voice,
-    inputs=gr.Audio(
-        label="Voice Input",
-        sources=["microphone", "upload"],
-        type="filepath"
-    ),
-    outputs=gr.JSON(label="Analysis Results"),
-    title="Digital Gut - Voice Emotion Analysis",
-    description="Performs emotion analysis and text conversion from voice input.",
-    theme=gr.themes.Soft()
-)
-# Launch app
 if __name__ == "__main__":
-    interface.launch()

 import numpy as np
 import librosa
 from transformers import pipeline
+import json
+# Initialize AI models
 emotion_analyzer = pipeline("audio-classification", model="MIT/ast-finetuned-speech-commands-v2")
 speech_recognizer = pipeline("automatic-speech-recognition",
                            model="kresnik/wav2vec2-large-xlsr-korean")
+# Global state management
+current_stage = "intro"
+session_data = {}
+def create_interface():
+    with gr.Blocks(theme=gr.themes.Soft()) as app:
+        # State management
+        state = gr.State(value={"stage": "intro", "session_data": {}})
+        # Header
+        gr.Markdown("# 디지털 굿판")
+        # Navigation tabs
+        with gr.Tabs() as tabs:
+            # Intro/세계관 Stage
+            with gr.Tab("입장", id="intro"):
+                gr.Markdown("""
+                # 디지털 굿판에 오신 것을 환영합니다
+                온천천의 디지털 치유 공간으로 들어가보세요.
+                """)
+                intro_next = gr.Button("여정 시작하기")
+            # 청신 Stage (Sound Purification)
+            with gr.Tab("청신", id="cleansing", visible=False):
+                with gr.Row():
+                    audio_player = gr.Audio(
+                        value="path_to_default_sound.mp3",  # 기본 사운드 파일
+                        type="filepath",
+                        label="온천천의 소리"
+                    )
+                    location_info = gr.Textbox(
+                        label="현재 위치",
+                        value="온천장역",
+                        interactive=False
+                    )
+                cleansing_next = gr.Button("다음 단계로")
+            # 기원 Stage (Voice Analysis)
+            with gr.Tab("기원", id="voice", visible=False):
+                with gr.Row():
+                    # Voice input component
+                    voice_input = gr.Audio(
+                        label="목소리로 전하기",
+                        sources=["microphone", "upload"],
+                        type="filepath"
+                    )
+                    # Analysis results
+                    with gr.Column():
+                        emotion_output = gr.JSON(
+                            label="감정 분석 결과",
+                            visible=True
+                        )
+                        text_output = gr.Textbox(
+                            label="음성 텍스트",
+                            visible=True
+                        )
+                voice_next = gr.Button("다음 단계로")
+            # 송신 Stage (Sharing)
+            with gr.Tab("송신", id="sharing", visible=False):
+                with gr.Row():
+                    gr.Gallery(
+                        label="생성된 이미지",
+                        show_label=True,
+                        elem_id="gallery"
+                    )
+                    gr.Markdown("## 공동체와 함께 나누기")
+                complete_button = gr.Button("완료")
+        # Floating navigation menu
+        with gr.Row(visible=True) as float_menu:
+            gr.Button("🏠", scale=1)
+            gr.Button("🎵", scale=1)
+            gr.Button("🎤", scale=1)
+            gr.Button("🖼️", scale=1)
+        # Voice analysis function
+        def analyze_voice(audio_file, state):
+            try:
+                if audio_file is None:
+                    return {"error": "No audio input provided"}, state
+                # Load audio
+                y, sr = librosa.load(audio_file)
+                # Emotion analysis
+                emotions = emotion_analyzer(y)
+                primary_emotion = emotions[0]
+                # Speech to text
+                text_result = speech_recognizer(y)
+                # Update state
+                state["voice_analysis"] = {
+                    "emotion": primary_emotion['label'],
+                    "probability": float(primary_emotion['score']),
+                    "text": text_result['text']
+                }
+                return {
+                    "emotion": primary_emotion['label'],
+                    "emotion_probability": f"{primary_emotion['score']:.2f}",
+                    "transcribed_text": text_result['text'],
+                    "status": "Analysis complete"
+                }, state
+            except Exception as e:
+                return {"error": str(e), "status": "Error occurred"}, state
+        # Event handlers
+        voice_input.change(
+            fn=analyze_voice,
+            inputs=[voice_input, state],
+            outputs=[emotion_output, state]
+        )
+        # Stage navigation
+        intro_next.click(
+            fn=lambda s: {"stage": "cleansing", **s},
+            inputs=[state],
+            outputs=[state],
+        )
+        cleansing_next.click(
+            fn=lambda s: {"stage": "voice", **s},
+            inputs=[state],
+            outputs=[state],
+        )
+        voice_next.click(
+            fn=lambda s: {"stage": "sharing", **s},
+            inputs=[state],
+            outputs=[state],
+        )
+    return app
+# Launch the application
 if __name__ == "__main__":
+    app = create_interface()
+    app.launch()