IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Jun 17, 2024

Commit

8ea3b6d

verified ·

1 Parent(s): f302d37

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -13

app.py CHANGED Viewed

@@ -417,6 +417,47 @@ def get_weather_icon(condition):
     }
     return condition_map.get(condition, "c01d")
 def update_map_with_response(history):
     if not history:
         return ""
@@ -482,19 +523,7 @@ hardcoded_prompt = "A cat holding a sign that says hello world"
 # Gradio Blocks interface
 with gr.Blocks(theme='rawrsor1/Everforest') as demo:
     with gr.Row():
-        with gr.Column():
-            gr.HTML('''
-                <div style="animation: fadeIn 2s ease-in-out infinite alternate;">
-                    <h1 style="font-size: 4em; text-align: center; color: #4CAF50;">Welcome to Omaha Events</h1>
-                </div>
-                <style>
-                    @keyframes fadeIn {
-                        from { opacity: 0; }
-                        to { opacity: 1; }
-                    }
-                </style>
-            ''')
-            chatbot = gr.Chatbot([], elem_id="chatbot", bubble_full_width=False)
         with gr.Column():
             weather_output = gr.HTML(value=fetch_local_weather())

     }
     return condition_map.get(condition, "c01d")
+# Voice Control
+import numpy as np
+import torch
+from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
+model_id = 'openai/whisper-large-v3'
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype,
+                                                  #low_cpu_mem_usage=True,
+                                                  use_safetensors=True).to(device)
+processor = AutoProcessor.from_pretrained(model_id)
+# Optimized ASR pipeline
+pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, chunk_length_s=15, batch_size=16, torch_dtype=torch_dtype, device=device, return_timestamps=True)
+base_audio_drive = "/data/audio"
+import numpy as np
+def transcribe_function(stream, new_chunk):
+    try:
+        sr, y = new_chunk[0], new_chunk[1]
+    except TypeError:
+        print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
+        return stream, "", None
+    y = y.astype(np.float32) / np.max(np.abs(y))
+    if stream is not None:
+        stream = np.concatenate([stream, y])
+    else:
+        stream = y
+    result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
+    full_text = result.get("text", "")
+    return stream, full_text, result
 def update_map_with_response(history):
     if not history:
         return ""
 # Gradio Blocks interface
 with gr.Blocks(theme='rawrsor1/Everforest') as demo:
     with gr.Row():
+        chatbot = gr.Chatbot([], elem_id="chatbot", bubble_full_width=False)
         with gr.Column():
             weather_output = gr.HTML(value=fetch_local_weather())