Pijush2023 commited on
Commit
8b59821
·
verified ·
1 Parent(s): 584eaab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -18
app.py CHANGED
@@ -24,6 +24,7 @@ from huggingface_hub import login
24
  from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
25
  from parler_tts import ParlerTTSForConditionalGeneration
26
  from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
 
27
  from string import punctuation
28
 
29
  # Check if the token is already set in the environment variables
@@ -317,7 +318,7 @@ def fetch_local_news():
317
  api_key = os.environ['SERP_API']
318
  url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
319
  response = requests.get(url)
320
- if response.status_code == 200:
321
  results = response.json().get("news_results", [])
322
  news_html = """
323
  <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
@@ -494,21 +495,51 @@ def preprocess(text):
494
  text = text.replace(abv, separate_abb(abv))
495
  return text
496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
  def generate_audio_parler_tts(text):
498
  description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
499
- inputs = parler_tokenizer(description, return_tensors="pt").to(device)
500
- prompt = parler_tokenizer(preprocess(text), return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
501
 
502
- set_seed(SEED)
503
- generation = parler_model.generate(input_ids=inputs.input_ids, prompt_input_ids=prompt.input_ids)
504
- audio_arr = generation.cpu().numpy().squeeze()
505
 
506
- temp_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_audio.wav")
507
- from scipy.io.wavfile import write as write_wav
508
- write_wav(temp_audio_path, SAMPLE_RATE, audio_arr)
 
 
509
 
510
- logging.debug(f"Audio saved to {temp_audio_path}")
511
- return temp_audio_path
512
 
513
  pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", torch_dtype=torch.float16)
514
  pipe.to(device)
@@ -553,14 +584,14 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
553
  audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
554
  audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
555
 
556
- # gr.Markdown("<h1 style='color: red;'>Map</h1>", elem_id="location-markdown")
557
- # location_output = gr.HTML()
558
- # bot_msg.then(show_map_if_details, [chatbot, choice], [location_output, location_output])
559
 
560
- # with gr.Column():
561
- # weather_output = gr.HTML(value=fetch_local_weather())
562
- # news_output = gr.HTML(value=fetch_local_news())
563
- # events_output = gr.HTML(value=fetch_local_events())
564
 
565
  with gr.Column():
566
  image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
@@ -572,3 +603,4 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
572
 
573
  demo.queue()
574
  demo.launch(share=True)
 
 
24
  from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
25
  from parler_tts import ParlerTTSForConditionalGeneration
26
  from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
27
+ from scipy.io.wavfile import write as write_wav
28
  from string import punctuation
29
 
30
  # Check if the token is already set in the environment variables
 
318
  api_key = os.environ['SERP_API']
319
  url = f'https://serpapi.com/search.json?engine=google_news&q=birmingham headline&api_key={api_key}'
320
  response = requests.get(url)
321
+ if response.status_code == 200):
322
  results = response.json().get("news_results", [])
323
  news_html = """
324
  <h2 style="font-family: 'Georgia', serif; color: #ff0000; background-color: #f8f8f8; padding: 10px; border-radius: 10px;">Birmingham Today</h2>
 
495
  text = text.replace(abv, separate_abb(abv))
496
  return text
497
 
498
+ def chunk_text(text, max_length=250):
499
+ words = text.split()
500
+ chunks = []
501
+ current_chunk = []
502
+ current_length = 0
503
+
504
+ for word in words:
505
+ if current_length + len(word) + 1 <= max_length:
506
+ current_chunk.append(word)
507
+ current_length += len(word) + 1
508
+ else:
509
+ chunks.append(' '.join(current_chunk))
510
+ current_chunk = [word]
511
+ current_length = len(word) + 1
512
+
513
+ if current_chunk:
514
+ chunks.append(' '.join(current_chunk))
515
+
516
+ return chunks
517
+
518
  def generate_audio_parler_tts(text):
519
  description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
520
+ chunks = chunk_text(preprocess(text))
521
+ audio_paths = []
522
+
523
+ for chunk in chunks:
524
+ inputs = parler_tokenizer(description, return_tensors="pt").to(device)
525
+ prompt = parler_tokenizer(chunk, return_tensors="pt").to(device)
526
+
527
+ set_seed(SEED)
528
+ generation = parler_model.generate(input_ids=inputs.input_ids, prompt_input_ids=prompt.input_ids)
529
+ audio_arr = generation.cpu().numpy().squeeze()
530
 
531
+ temp_audio_path = os.path.join(tempfile.gettempdir(), f"parler_tts_audio_{len(audio_paths)}.wav")
532
+ write_wav(temp_audio_path, SAMPLE_RATE, audio_arr)
533
+ audio_paths.append(temp_audio_path)
534
 
535
+ combined_audio_path = os.path.join(tempfile.gettempdir(), "parler_tts_combined_audio.wav")
536
+ with open(combined_audio_path, "wb") as f:
537
+ for path in audio_paths:
538
+ with open(path, "rb") as part_f:
539
+ f.write(part_f.read())
540
 
541
+ logging.debug(f"Audio saved to {combined_audio_path}")
542
+ return combined_audio_path
543
 
544
  pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", torch_dtype=torch.float16)
545
  pipe.to(device)
 
584
  audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
585
  audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="SAMLOne_real_time")
586
 
587
+ gr.Markdown("<h1 style='color: red;'>Map</h1>", elem_id="location-markdown")
588
+ location_output = gr.HTML()
589
+ bot_msg.then(show_map_if_details, [chatbot, choice], [location_output, location_output])
590
 
591
+ with gr.Column():
592
+ weather_output = gr.HTML(value=fetch_local_weather())
593
+ news_output = gr.HTML(value=fetch_local_news())
594
+ events_output = gr.HTML(value=fetch_local_events())
595
 
596
  with gr.Column():
597
  image_output_1 = gr.Image(value=generate_image(hardcoded_prompt_1), width=400, height=400)
 
603
 
604
  demo.queue()
605
  demo.launch(share=True)
606
+