Pijush2023 commited on
Commit
8ea3b6d
·
verified ·
1 Parent(s): f302d37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -13
app.py CHANGED
@@ -417,6 +417,47 @@ def get_weather_icon(condition):
417
  }
418
  return condition_map.get(condition, "c01d")
419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  def update_map_with_response(history):
421
  if not history:
422
  return ""
@@ -482,19 +523,7 @@ hardcoded_prompt = "A cat holding a sign that says hello world"
482
  # Gradio Blocks interface
483
  with gr.Blocks(theme='rawrsor1/Everforest') as demo:
484
  with gr.Row():
485
- with gr.Column():
486
- gr.HTML('''
487
- <div style="animation: fadeIn 2s ease-in-out infinite alternate;">
488
- <h1 style="font-size: 4em; text-align: center; color: #4CAF50;">Welcome to Omaha Events</h1>
489
- </div>
490
- <style>
491
- @keyframes fadeIn {
492
- from { opacity: 0; }
493
- to { opacity: 1; }
494
- }
495
- </style>
496
- ''')
497
- chatbot = gr.Chatbot([], elem_id="chatbot", bubble_full_width=False)
498
 
499
  with gr.Column():
500
  weather_output = gr.HTML(value=fetch_local_weather())
 
417
  }
418
  return condition_map.get(condition, "c01d")
419
 
420
+ # Voice Control
421
+ import numpy as np
422
+ import torch
423
+ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
424
+
425
+ model_id = 'openai/whisper-large-v3'
426
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
427
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
428
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype,
429
+ #low_cpu_mem_usage=True,
430
+ use_safetensors=True).to(device)
431
+ processor = AutoProcessor.from_pretrained(model_id)
432
+
433
+ # Optimized ASR pipeline
434
+ pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, chunk_length_s=15, batch_size=16, torch_dtype=torch_dtype, device=device, return_timestamps=True)
435
+
436
+ base_audio_drive = "/data/audio"
437
+
438
+ import numpy as np
439
+
440
+ def transcribe_function(stream, new_chunk):
441
+ try:
442
+ sr, y = new_chunk[0], new_chunk[1]
443
+ except TypeError:
444
+ print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
445
+ return stream, "", None
446
+
447
+ y = y.astype(np.float32) / np.max(np.abs(y))
448
+
449
+ if stream is not None:
450
+ stream = np.concatenate([stream, y])
451
+ else:
452
+ stream = y
453
+
454
+ result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
455
+
456
+ full_text = result.get("text", "")
457
+
458
+ return stream, full_text, result
459
+
460
+
461
  def update_map_with_response(history):
462
  if not history:
463
  return ""
 
523
  # Gradio Blocks interface
524
  with gr.Blocks(theme='rawrsor1/Everforest') as demo:
525
  with gr.Row():
526
+ chatbot = gr.Chatbot([], elem_id="chatbot", bubble_full_width=False)
 
 
 
 
 
 
 
 
 
 
 
 
527
 
528
  with gr.Column():
529
  weather_output = gr.HTML(value=fetch_local_weather())